library(haven)
library(tidyverse)
library(readxl)
library(tigris)
library(tidycensus)
library(sf)
library(leaflet)
library(gridExtra)
library(RColorBrewer)
library(kableExtra)
library(broom.mixed)
library(modelsummary)
library(gtsummary)
library(cowplot)
library(lme4)
library(survey)
library(srvyr)
options(tigris_use_cache = TRUE)
options(progress_enabled = FALSE)Another comparison of two area-level socioeconomic deprivation indices: SVI (Social Vulnerability Index) and ADI (Area Deprivation Index) and their associations with asthma and smoking outcomes from survey data
BMIN503/EPID600 Final Project
0.1 Packages
0.2 Functions
#' Generate custom scatter plot
#'
#' @param df Dataset
#' @param variable_x Variable on x axis
#' @param variable_y Variable on y axis
#'
#' @return Custom scatter plot with linear and polynomial smooths
#'
my_point_plot <- function(df,
variable_x,
variable_y) {
df |>
ggplot(aes(x = {{variable_x}}, y = {{variable_y}})) +
geom_point(alpha = 0.5, colour = "darkgrey") +
geom_smooth(
method = 'lm',
formula = y ~ poly(x, 2),
aes(color = 'polynomial'),
se = FALSE
) +
geom_smooth(
method = 'lm',
formula = y ~ x,
aes(color = 'linear'),
se = FALSE
) +
my_theme() +
theme(legend.position = "none")
}
#' Generate custom boxplot
#'
#' @param df Dataset
#' @param variable_x Variable on x axis
#' @param variable_y Variable on y axis
#'
#' @return Custom boxplot with shaded violin plot
#'
my_box_plot <- function(df,
variable_x,
variable_y){
df |>
ggplot(aes(x = {{variable_x}}, y = {{variable_y}})) +
my_theme() +
geom_violin(fill = "#95ccbb",
colour = NA,
alpha = 0.8) +
geom_boxplot(alpha = 0)
}
#' Custom ggplot theme
#'
#'
my_theme <- function() {
theme_half_open() +
theme(
panel.grid = element_line(color = "white"),
legend.key.size = unit(0.4, "cm"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
plot.title = element_text(size = 16)
)
}
# This function was taken from https://rdrr.io/github/bradisbrad/olfatbones/src/R/get_tri.R
#' Triangularize a Correlation Matrix
#' @description Takes a correlation matrix and removes the redundant information
#'
#' @param cormat Correlation Matrix
#' @param lower Lower or upper half (default = T) select F for upper half
#'
#' @return Matrix
#' @export get_tri
#'
#' @examples
#' cormat <- round(cor(mtcars), 2)
#' get_tri(cormat)
#'
get_tri <- function(cormat, lower = T){
if(lower){
cormat[upper.tri(cormat)] <- NA
} else {
cormat[lower.tri(cormat)] <- NA
}
cormat
}
#' Adapted from https://rpubs.com/cqj_00/785193
#' Compute correlations for pairs of variables using Spearman's correlation and plot using heatmap
#'
#' @param df Dataset restricted to only continuous variables
#'
#' @return Heatmap of Spearman's rank correlation coefficient for each combination of continous variables
#'
get_corr_plot <- function(df) {
combined_corrs <- df |>
cor(use = "complete.obs", method = "spearman") |>
as.matrix() |>
get_tri() |>
reshape2::melt(na.rm = TRUE) |>
rename(variable_1 = Var1,
variable_2 = Var2)
combined_corrs |>
ggplot(aes(x = variable_2, y = variable_1, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "grey100",
limit = c(-1, 0.999), # 0.999 so correlation of 1 appear grey
space = "Lab",
name = "Spearman\nCorrelation"
) +
geom_text(aes(label = round(value, 2))) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 45,
vjust = 1,
size = 10,
hjust = 1
),
axis.text.y = element_text(size = 10)
) +
coord_fixed() +
coord_flip()
}
#' Compute weighted survey means for a variable
#'
#' @param smart_df SMART BRFSS dataset
#' @param smart_survey Survey design object for SMART BRFSS dataset (generated using srvyr::as_survey_design() )
#' @param variable_name Variable to compute as weighted survey means
#'
#' @return Table with weighted (prefixed with w_) and unweighted (prefixed with uw_) means for variable for each MMSA
#'
get_mmsa_survey_means <- function(smart_df,
smart_survey,
variable_name){
# get unweighted means by mmsa (uw_)
uw_mmsa_tbl <- smart_df |>
select(d_mmsa, mmsaname, {{variable_name}}) |>
group_by(d_mmsa, mmsaname) |>
summarise(across(starts_with(variable_name), ~ mean(.x, na.rm = TRUE), .names = "uw_{.col}"))
# get weighted means by mmsa using svyby (w_)
w_mmsa_tbl <- svyby(
~ temp_var_name,
~ d_mmsa,
design = smart_survey |> rename(temp_var_name = {{variable_name}}),
FUN = svymean,
na.rm = TRUE
) |>
rename_with( ~ str_replace(.x, "temp_var_name", paste0("w_", variable_name)))
# combine
ever_smoker_mmsa_tbl <- uw_mmsa_tbl |>
inner_join(w_mmsa_tbl, by = "d_mmsa") |>
rename_with( ~ str_replace(.x, "w_d_", "w_")) |>
select(- se)
}1 Overview
The goal of this project is to compare the Agency for Toxic Substances and Disease Registry Social Vulnerability Index (SVI) and the University of Wisconsin-developed Area Deprivation Index (ADI) in their association with asthma and smoking outcomes from the Behavioral Risk Factor Surveillance System (BRFSS) CDC dataset. This topic is interdisciplinary in that it spans data science, social science, epidemiology, and biomedical informatics.
For the asthma and smoking outcomes in the BRFSS CDC dataset, the 2021 Selected Metropolitan/Micropolitan Area Risk Trends (SMART) subset is used. The SMART BRFSS dataset provides the CBSA (Core-based statistical area), which “consist of the county or counties (or equivalent entities) associated with at least one core (urban area) of at least 10,000 population, plus adjacent counties having a high degree of social and economic integration with the core as measured through commuting ties.” https://www.census.gov/programs-surveys/metro-micro/about/glossary.html The terms CBSA and MMSA (Metropolitan or micropolitan statistical area) are used interchangeably in this report.
The SVI is developed by the Agency for Toxic Substances and Disease Registry as a index which identifies geographic areas with limited “ability to prevent human suffering and financial loss in the event of [natural or man-made] disaster”. The SVI can be provided as a national or within-state ranking provided at the county or census track level. For this project the most recent 2020 county-level national ranking is used. The SVI includes four themes that are combined into an overall measures. The SVI is discussed further in Section 3.2.
The ADI is developed by the University of Wisconsin as a measure of socioeconomic disadvantage which can be used to “inform health delivery and policy”. It was adapted from a measure originally developed by the Health Resources & Services Administration (HRSA). It is available at the census block group level as a national or within-state ranking. For the project, the most recent 2021 national ranking is used. The ADI is discussed further in Section 3.3.
To link and harmonize the SMART BRFSS, SVI and ADI datasets, census counts and shapefile data are required. A summary of the main datasets for this project is provided in Table 1. Each dataset and methodology for linkage and harmonization is presented in Section 3.
| Name | Year | Brief description | URL |
|---|---|---|---|
| SMART BRFSS | 2021 | Selected Metropolitan/Micropolitan Area Risk Trends (SMART) subset of Behavioral Risk Factor Surveillance System (BRFSS) | https://www.cdc.gov/brfss/smart/smart_2021.html |
| SVI | 2020 | Social Vulnerability Index (SVI) | https://www.atsdr.cdc.gov/placeandhealth/svi/data_documentation_download.html |
| ADI | 2021 | Area Deprivation Index (ADI) | https://www.neighborhoodatlas.medicine.wisc.edu/ |
Census shapefiles
|
2021 | Census boundaries and names | https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html (via tigris) |
Census counts
|
2020 | Decennial census counts | https://www.census.gov/data/developers/data-sets.html (via tidycensus) |
The overall topic for this project was shaped by feedback from Dr. Blanca Himes who suggested use of the SMART BRFSS dataset, inclusion of asthma as an outcome, and selection of SVI and ADI measures as the two area-level socioeconomic deprivation indices, as well as providing input on appropriate incorporation of survey weighting. Dr. Sherrie Xie provided guidance on overall approach and functions for linking and harmonizing across datasets.
2 Introduction
As Rollings et al 2023 discuss, there exist several area-level socioeconomic deprivation indices without consensus within healthcare research and policy fields about which indices should be used for a variety of analyses. In comparing two of the most frequently used indices (CITE), SVI and ADI, in their associations with asthma and smoking outcomes from the SMART BRFSS dataset, this project has the potential to inform appropriate measure selection for future work. This comparison closely relates to Rollings et al 2023, which compares SVI and ADI across the US at the census tract-level.1 Rollings et al 2023 provide a thorough comparison of the two indices, including intended purpose and construction methods, and examine correlations between the indices across census tracts. Their findings, which are targeted at informing public health research, practice and policy, are discussed further and related to this project in Section 5.
Asthma and smoking were selected as outcomes as they are well-studied and have reported associations with factors which relate to socioeconomic deprivation. This allows the findings of this project to be related back to the literature. Brief summaries of CDC-provided epidemiological data for asthma and smoking outcomes are provided in the paragraphs below.
For the asthma outcome, the SMART BRFSS variable for lifetime asthma prevalence _LTASTH1 is used. This variable identifies respondents who reported that they had been told by a doctor, nurse, or health professional that they had asthma. Asthma is a chronic disease of the lungs affecting 6.5% children and 8.0% adults in the US (https://www.cdc.gov/asthma/most_recent_national_asthma_data.htm, Xie et al 2018). A range of factors, such as low socioeconomic status and pollution, are known to have a relationship with asthma exacerbations (Xie at al 2018). For asthma prevalence, however, there is more tentative evidence that social and environmental factors are involved (Lotfata et al 2023). According to a summary of asthma prevalence in the US provided by the CDC for 2001-2021 https://www.cdc.gov/asthma/Asthma-Prevalence-US-2023-508.pdf, asthma prevalence is higher among adults compared to children, females compared to males, non-Hispanic Black persons compared to non-Hispanic White persons, other Hispanic compared to Mexican/Mexican American Hispanic persons, persons living in lower household income groups compared to higher household income groups and persons not living in large metropolitan statistical areas compared to persons living in large metropolitan statistical areas. Figure 1 provides a summary of asthma prevalence by age, sex, and race/ethnicity from https://www.cdc.gov/asthma/asthmadata.htm.
For the smoking outcome, the SMART BRFSS smoking status variable _SMOKER3is derived to identify respondents who reported having smoked at least 100 cigarettes in their lifetime. According to https://www.cdc.gov/tobacco/data_statistics/fact_sheets/adult_data/cig_smoking/index.htm, cigarette smoking is the leading cause of preventable disease, disability, and death in the US (citing https://www.ncbi.nlm.nih.gov/books/NBK179276/). For current cigarette smoking, rates are higher among men than women, highest among adults aged 45-64, highest among non-Hispanic adults from other racial groups compared to other race/ethnicity categories, highest among adults with low income, and highest in the Midwest (https://www.cdc.gov/tobacco/data_statistics/fact_sheets/adult_data/cig_smoking/index.htm). Current smoking status may have different distributions that the lifetime smoker variable outcome used for this project.
A challenge for this project, as with many projects which incorporate geospatial variables, is harmonizing and linking datasets with differing geographic units. SVI is available at the county-level, ADI is at the census block group-level, and the SMART BRFSS outcomes are at the individual-level, with individuals linked to CBSAs (Table 1). Fortunately, all of these geographic units are census-defined and are supersets or subsets of each other. Methods for linkage and harmonization are presented in Section 3. Correlations across SVI and ADI are examined at county (Section 4.1.1) and CBSA- levels (Section 4.2.1). For the area-level analyses, the combined dataset is harmonized at the county level and (quasi)Poisson regression analyses are applied to explore SVI and ADI as predictors of asthma and smoking rates at the county level (Section 4.1). For the individual-level analyses, binomial logistic regression analyses are applied to explore SVI and ADI measures as predictors of likelihood of asthma and smoking at the individual level (Section 4.2). A discussion of implications, limitations, and potential future directions for this project is provided in Section 5.
3 Methods
3.1 SMART BRFSS CDC dataset
The SMART BRFSS CDC dataset was retrieved from https://www.cdc.gov/brfss/smart/smart_2021.html. A unique identifier for each respondent is added. The dataset included labels which were extracted and manipulated to create a table of field names and descriptions. Missing variable descriptions were added based on CDC BRFSS documentation. Given that many of the variable names do not adhere to R’s requirements for naming (e.g. derived variables starting with underscores) and would require use of backticks, variable names are programmatically renamed. The convention of prefixing with d_ (for derived) for variable names started with underscores is adopted.
# MMSA2021.xpt retrieved from https://www.cdc.gov/brfss/smart/smart_2021.html
# Accompanying documentation available at https://www.cdc.gov/brfss/annual_data/2021/pdf/2021_smart_brfss_mmsa_methodology-508.pdf
MMSA2021 <- read_xpt("data/input_data/mmsa/MMSA2021_XPT/MMSA2021.xpt") |>
mutate(`_resp_id` = str_pad(row_number(), pad = 0, width = 6)) # add respondent idMMSA2021 |> glimpse()# construct table of variable names and labels
mmsa_colname_labels <- MMSA2021 |>
map_dfc(attr, "label") |>
pivot_longer(everything(),
names_to = "colname",
values_to = "label")
# include variable names without labels
mmsa_colnames_all <- MMSA2021 |>
colnames() |>
as_tibble() |>
rename(colname = value) |>
left_join(mmsa_colname_labels, by = join_by(colname))
# add missing labels documented in https://www.cdc.gov/brfss/annual_data/2021/pdf/2021_smart_brfss_mmsa_methodology-508.pdf
attr(MMSA2021[["_MMSA"]], "label") <- toupper("the code of metropolitan or micropolitan statistical area where the respondent lives")
attr(MMSA2021[["MMSANAME"]], "label") <- toupper("the MMSA name")
attr(MMSA2021[["_MMSAWT"]], "label") <- toupper("the MMSA-level weight that is used when generating MMSA-level estimates for variables in the data set")
attr(MMSA2021[["_resp_id"]], "label") <- toupper("respondent ID")
mmsa_colnames_all <- MMSA2021 |>
map_dfc(attr, "label") |>
pivot_longer(everything(),
names_to = "colname",
values_to = "label")# rename columns to make them easier to work with using R (not requiring ``)
# underscore prefix indicates derived or computed, replace with d_
smart_2021 <- MMSA2021 |>
rename_with(~ tolower(str_replace(.x, "^_", "d_"))) |>
mutate(d_mmsa_char = as.character(d_mmsa)) # make d_mmsa character
smart_2021 |> colnames() [1] "dispcode" "statere1" "celphon1" "ladult1" "colgsex"
[6] "landsex" "respslct" "safetime" "cadult1" "cellsex"
[11] "hhadult" "sexvar" "genhlth" "physhlth" "menthlth"
[16] "poorhlth" "priminsr" "persdoc3" "medcost1" "checkup1"
[21] "exerany2" "bphigh6" "bpmeds" "cholchk3" "toldhi3"
[26] "cholmed3" "cvdinfr4" "cvdcrhd4" "cvdstrk3" "asthma3"
[31] "asthnow" "chcscncr" "chcocncr" "chccopd3" "addepev3"
[36] "chckdny2" "diabete4" "diabage3" "havarth5" "arthexer"
[41] "arthedu" "lmtjoin3" "arthdis2" "joinpai2" "marital"
[46] "educa" "renthom1" "numhhol3" "numphon3" "cpdemo1b"
[51] "veteran3" "employ1" "children" "income3" "pregnant"
[56] "weight2" "height3" "deaf" "blind" "decide"
[61] "diffwalk" "diffdres" "diffalon" "smoke100" "smokday2"
[66] "usenow3" "ecignow1" "alcday5" "avedrnk3" "drnk3ge5"
[71] "maxdrnks" "flushot7" "flshtmy3" "imfvpla2" "pneuvac4"
[76] "hivtst7" "hivtstd3" "fruit2" "fruitju2" "fvgreen1"
[81] "frenchf1" "potatoe1" "vegetab2" "d_ststr" "d_impsex"
[86] "cageg" "d_rfhlth" "d_phys14d" "d_ment14d" "d_hlthpln"
[91] "d_hcvu652" "d_totinda" "d_rfhype6" "d_cholch3" "d_rfchol3"
[96] "d_michd" "d_ltasth1" "d_casthm1" "d_asthms1" "d_drdxar3"
[101] "d_lmtact3" "d_lmtwrk3" "d_prace1" "d_mrace1" "d_hispanc"
[106] "d_race" "d_raceg21" "d_racegr3" "d_raceprv" "d_sex"
[111] "d_ageg5yr" "d_age65yr" "d_age80" "d_age_g" "wtkg3"
[116] "d_bmi5" "d_bmi5cat" "d_rfbmi5" "d_educag" "d_incomg1"
[121] "d_smoker3" "d_rfsmok3" "d_cureci1" "drnkany5" "d_rfbing5"
[126] "d_drnkwk1" "d_rfdrhv7" "d_flshot7" "d_pneumo3" "d_aidtst4"
[131] "ftjuda2_" "frutda2_" "grenda1_" "frnchda_" "potada1_"
[136] "vegeda2_" "d_misfrt1" "d_misveg1" "d_frtres1" "d_vegres1"
[141] "d_frutsu1" "d_vegesu1" "d_frtlt1a" "d_veglt1a" "d_frt16a"
[146] "d_veg23a" "d_fruite1" "d_vegete1" "d_mmsa" "d_mmsawt"
[151] "seqno" "mmsaname" "d_resp_id" "d_mmsa_char"
smart_2021 |> glimpse()To map MMSAs to county for downstream analyses, it is necessary to retrieve the relevant delineation file (March 2020) from https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/historical-delineation-files.html. Most MMSAs mapped to CBSA. A subset of MMSAs which appear to correspond to some larger metropolitan areas mapped to a “Metropolitan Division Code”.
# https://www.cdc.gov/brfss/annual_data/2021/pdf/2021_smart_brfss_mmsa_methodology-508.pdf
# Typically, BRFSS data are used to produce state-level estimates; however, for the SMART
# project, BRFSS data were used to produce small area-level estimates for MMSAs as defined by
# the US Census Bureau. On June 6, 2003, OMB issued new definitions for MMSA and
# metropolitan divisions. OMB periodically updates the list of MMSAs. The list of areas used for this
# analysis can be found here: https://www.census.gov/geographies/reference-files/timeseries/demo/metro-micro/delineation-files.html. The March 2020 release was used for defining
# the MMSAs in the 2021 SMART data set.
# retrieved from https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/historical-delineation-files.html
delin_2020 <- read_xls("data/input_data/mmsa/list1_2020.xls",
range = "A3:L1919") |>
rename_with(~ tolower(gsub(" ", "_", .x, fixed = TRUE))) |>
rename_with(~ tolower(gsub("/", "_", .x, fixed = TRUE)))
delin_2020_cbsa_join <- delin_2020 |> filter(is.na(metropolitan_division_code) | cbsa_code %in% c("16980", "31080")) # CBSA maps
delin_2020_mdc_join <- delin_2020 |> filter(!is.na(metropolitan_division_code) & !cbsa_code %in% c("16980", "31080")) # Metropolitan division code maps
# map to CBSA
mmsa_county_mapping_cbsa <- smart_2021 |>
distinct(d_mmsa, d_mmsa, d_mmsa_char, mmsaname) |>
inner_join(delin_2020_cbsa_join, by = c("d_mmsa_char" = "cbsa_code")) |>
distinct(d_mmsa, d_mmsa_char, csa_title, county_county_equivalent, fips_state_code, fips_county_code)
# map to metropolitan division code
mmsa_county_mapping_mdc <- smart_2021 |>
distinct(d_mmsa, d_mmsa_char, d_mmsa_char, mmsaname) |>
inner_join(delin_2020_mdc_join, by = c("d_mmsa_char" = "metropolitan_division_code")) |>
distinct(d_mmsa, d_mmsa_char, csa_title, county_county_equivalent, fips_state_code, fips_county_code)
# combine
mmsa_county_mapping <- mmsa_county_mapping_cbsa |>
dplyr::union(mmsa_county_mapping_mdc) |>
distinct()# check mmsas retained
smart_2021 |> summarize(n_distinct(d_mmsa)) |> pull() == mmsa_county_mapping |> summarize(n_distinct(d_mmsa)) |> pull()
# check for issues with leading zeros
smart_2021 |>
filter(nchar(d_mmsa) != 5)
smart_2021 |>
mutate(d_mmsa_char = as.character(d_mmsa)) |>
filter(nchar(d_mmsa_char) != 5)
mmsa_county_mapping |>
filter(nchar(d_mmsa_char) != 5)
delin_2020 |>
filter(nchar(cbsa_code) != 5)# download US counties shapefile
tigris_counties <- counties()
# join mmsa county mapping with counties shapefile
mmsa_county_mapping_geoid <- mmsa_county_mapping |>
mutate(smart = TRUE) |>
inner_join(tigris_counties, by = c("fips_state_code"= "STATEFP", "fips_county_code" = "COUNTYFP")) |>
mutate(smart = replace_na(smart, FALSE))
# check mmsas retained
# mmsa_county_mapping_geoid |> filter(smart == TRUE) |> nrow() == mmsa_county_mapping |> nrow()
mmsa_county_mapping_geoid_sf <- mmsa_county_mapping_geoid |>
filter(!fips_state_code %in% c("02",
"15", "60",
"66", "69",
"72", "78")) |> # limit to contiguous states and DC
st_as_sf()# Change the CRS (Coordinate Reference System) to 4326 because leaflet expects the data provided to it to be in WGS84 (EPSG:4326)
mmsa_county_mapping_geoid_sf_4326 <- st_transform(mmsa_county_mapping_geoid_sf, crs = 4326)
mmsa_county_mapping_geoid_sf_4326 |> saveRDS("data/intermediate_data/mmsa_county_mapping_geoid_sf_4326.rds")mmsa_county_mapping_geoid_sf_4326 <- readRDS("data/intermediate_data/mmsa_county_mapping_geoid_sf_4326.rds")pal_fun <- colorFactor("orange", mmsa_county_mapping_geoid_sf_4326$smart)
# leaflet plot of of counties within mmsas included in smart brfss
mmsa_county_mapping_geoid_sf_4326 |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,# remove polygon borders
fillColor = ~ pal_fun(smart),
fillOpacity = 0.5,
smoothFactor = 0.5,
popup = mmsa_county_mapping_geoid_sf_4326$county_county_equivalent
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addScaleBar()3.1.1 Aggregating smoking and asthma variables at CBSA-level
smart_2021 <- smart_2021 |>
mutate(
# categorize as 1 for ever smoker is smoking status is current or former smoke, else 0 (including don't know/refused/missing)
d_ever_smoker = if_else(d_smoker3 %in% c(1, 2, 3), 1, 0),
# categorize as 1 for yes for lifetime prevalence, else 0 (including don't know/refused/missing)
d_ever_asthma = if_else(d_ltasth1 == 2, 1, 0)
)3.1.1.1 _SMOKER3
# https://www.r-bloggers.com/2020/02/dem-7283-example-1-survey-statistics-using-brfss-data/
# https://stackoverflow.com/questions/55975478/problems-due-to-having-too-many-single-psus-at-stage-one
options(survey.adjust.domain.lonely=TRUE)
options(survey.lonely.psu="adjust")
# TODO check this
# use provided survey weights
smart_2021_surv <- smart_2021 |>
as_survey_design(ids = 1,
strata = d_ststr,
weights = d_mmsawt)# Investigate survey weighting use wtkg3 as can apply general expectations about distributions
# WEIGHT IN KG [2 implied decimal places]
# TODO investigate "only one PSU at stage 1" warnings / confirm approach
wtkg3_mmsa_tbl <- get_mmsa_survey_means(smart_df = smart_2021,
smart_survey = smart_2021_surv,
variable_name = "wtkg3")
wtkg3_mmsa_tbl |>
pivot_longer(cols = c("w_wtkg3", "uw_wtkg3"),
values_to = "wtkg3") |>
ggplot(aes(x = wtkg3 / 100, colour = name)) + # divide by 100 for 2 implied decimal places
geom_density() +
my_theme()
wtkg3_mmsa_tbl |>
arrange(desc(w_wtkg3)) |>
head(5)
wtkg3_mmsa_tbl |>
arrange(w_wtkg3) |>
head(5)
pal_fun_weight <- colorNumeric("BuPu", NULL)
mmsa_county_mapping_geoid_sf_4326 |>
left_join(wtkg3_mmsa_tbl, by = "d_mmsa") |>
mutate(weight_kg = w_wtkg3/100) |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_fun_weight(weight_kg),
fillOpacity = 0.5,
smoothFactor = 0.5,
# increase opacity and resolution
popup = mmsa_county_mapping_geoid_sf_4326$county_county_equivalent
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_fun_weight,
# palette function
values = ~ weight_kg,
# variable to pass to palette function
title = 'Mean weight (kg)',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()ever_smoker_mmsa_tbl <- get_mmsa_survey_means(smart_df = smart_2021,
smart_survey = smart_2021_surv,
variable_name = "d_ever_smoker")ever_smoker_mmsa_tbl |>
arrange(desc(w_ever_smoker)) |>
head(5)
ever_smoker_mmsa_tbl |>
arrange(w_ever_smoker) |>
head(5)pal_fun_smoker <- colorNumeric("plasma", NULL, reverse = TRUE)
mmsa_county_mapping_geoid_sf_4326 |>
left_join(ever_smoker_mmsa_tbl, by = "d_mmsa") |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_fun_smoker(w_ever_smoker),
fillOpacity = 0.5,
smoothFactor = 0.5,
# increase opacity and resolution
popup = mmsa_county_mapping_geoid_sf_4326$county_county_equivalent
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_fun_smoker,
# palette function
values = ~ w_ever_smoker,
# variable to pass to palette function
title = 'Proportion smoker',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()3.1.1.2 _LTASTH1
ever_asthma_mmsa_tbl <- get_mmsa_survey_means(smart_df = smart_2021,
smart_survey = smart_2021_surv,
variable_name = "d_ever_asthma")ever_asthma_mmsa_tbl |>
arrange(desc(w_ever_asthma)) |>
head(5)
ever_asthma_mmsa_tbl |>
arrange(w_ever_asthma) |>
head(5)pal_fun_asthma <- colorNumeric("viridis", NULL, reverse = TRUE)
mmsa_county_mapping_geoid_sf_4326 |>
left_join(ever_asthma_mmsa_tbl, by = "d_mmsa") |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_fun_asthma(w_ever_asthma),
fillOpacity = 0.5,
smoothFactor = 0.5,
# increase opacity and resolution
popup = mmsa_county_mapping_geoid_sf_4326$county_county_equivalent
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_fun_asthma,
# palette function
values = ~ w_ever_asthma,
# variable to pass to palette function
title = 'Proportion asthma',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()3.2 SVI dataset
| Field name | Description |
|---|---|
| RPL_THEME1 | Socioeconomic Status, incorporating information such as
|
| RPL_THEME2 | Household Characteristics, incorporating information such as
|
| RPL_THEME3 | Racial & Ethnic Minority Status Percentile percentage minority (Hispanic or Latino (of any race); Black and African American,Not Hispanic or Latino; American Indian and Alaska Native, Not Hispanic or Latino; Asian, Not Hispanic or Latino; Native Hawaiian and Other Pacific Islander, Not Hispanic or Latino; Two or More Races, Not Hispanic or Latino; Other Races, Not Hispanic or Latino) estimate |
| RPL_THEME4 | Housing Type & Transportation, incorporating information such as
|
| RPL_THEMES | Overall summary ranking |
# retrieved from https://www.atsdr.cdc.gov/placeandhealth/svi/data_documentation_download.html
# documentation here https://www.atsdr.cdc.gov/placeandhealth/svi/documentation/pdf/SVI2020Documentation_08.05.22.pdf
svi2020 <- read_csv("data/input_data/svi/SVI_2020_US_county.csv")svi2020 |> glimpse()
svi2020 |>
select(starts_with("RPL")) |>
summary()
svi2020 |>
select(starts_with("RPL")) |>
get_corr_plot()svi2020_sf <- svi2020 |>
# select summary theme ranking variables
select(starts_with("RPL_"), FIPS) |>
# join with county shapefile
left_join(tigris_counties, by = c("FIPS" = "GEOID")) |>
# convert to sf
st_as_sf()
svi2020_sf_4326 <- st_transform(svi2020_sf, crs = 4326)
svi2020_sf_4326 |> saveRDS("data/intermediate_data/svi2020_sf_4326.rds")svi2020_sf_4326 <-
readRDS("data/intermediate_data/svi2020_sf_4326.rds") |>
filter(!STATEFP %in% c("02",
"15", "60",
"66", "69",
"72", "78")) # limit to contiguous states and DCpal_svi <- colorNumeric("inferno", NULL, reverse = TRUE)
svi2020_sf_4326 |>
filter(!is.na(FUNCSTAT)) |> # TODO explore why this is necessary
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_svi(RPL_THEMES),
fillOpacity = 0.5,
smoothFactor = 0.5
# increase opacity and resolution
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_svi,
# palette function
values = ~ RPL_THEMES,
# variable to pass to palette function
title = 'SVI',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()3.2.1 SVI at CBSA-level
# get 2020 census counts by county for computing weighted average
county_cts <- get_decennial(
geography = "county",
variables = "P1_001N",
year = 2020,
output = "wide"
) |>
rename(county_ct = P1_001N)
county_cts |> saveRDS("data/intermediate_data/county_cts.RDS")
# computed county mmsa weights for county-level
county_mmsa_wts <- tigris_counties |>
select(STATEFP, COUNTYFP , GEOID) |>
st_drop_geometry() |>
inner_join(county_cts, by = "GEOID") |>
right_join(
mmsa_county_mapping,
by = c("STATEFP" = "fips_state_code",
"COUNTYFP" = "fips_county_code")
) |>
group_by(d_mmsa) |>
# mmsa_pop
mutate(mmsa_pop = sum(county_ct)) |>
ungroup() |>
# county_ct pop / mmsa_pop
mutate(county_mmsa_wt = county_ct/mmsa_pop) |>
arrange(GEOID)
county_mmsa_wts |> saveRDS("data/intermediate_data/county_mmsa_wts.RDS")county_mmsa_wts <- readRDS("data/intermediate_data/county_mmsa_wts.RDS")
svi2020_sf_4326 <- readRDS("data/intermediate_data/svi2020_sf_4326.rds")
# compute weighted average for svi at county-lev
mmsa_svi1 <- svi2020_sf_4326 |>
st_drop_geometry() |>
select(starts_with("RPL"), FIPS) |>
inner_join(county_mmsa_wts, by = c("FIPS" = "GEOID")) |>
mutate(across(starts_with("RPL"), as.numeric)) |>
# RPL theme ranking * county_mmsa_wt
mutate(across(starts_with("RPL"), ~ (.x * county_mmsa_wt), .names = "{.col}_wts"))
# unique(mmsa_svi1$RPL_THEME1 * mmsa_svi1$county_mmsa_wt == mmsa_svi1$RPL_THEME1_wts) # check matches
# unique(mmsa_svi1$RPL_THEMES * mmsa_svi1$county_mmsa_wt == mmsa_svi1$RPL_THEMES_wts) # check matches
mmsa_svi2 <- mmsa_svi1 |>
group_by(d_mmsa) |>
# weighted average sum of weighted svis
summarize(across(ends_with("_wts"), sum, .names = "mmsa_{.col}")) |>
rename_with(~ str_remove(.x, "_wts"))
mmsa_svi2 |> saveRDS("data/intermediate_data/mmsa_svi.RDS")mmsa_svi <- readRDS("data/intermediate_data/mmsa_svi.RDS")
# join mmsa county mapping with mmsa-level svi
svi2021_mmsa_sf_4326 <- mmsa_county_mapping_geoid_sf_4326 |>
left_join(mmsa_svi, by = "d_mmsa")
svi2021_mmsa_sf_4326 |> saveRDS("data/intermediate_data/svi2021_mmsa_sf_4326.rds")svi2021_mmsa_sf_4326 <- readRDS("data/intermediate_data/svi2021_mmsa_sf_4326.rds")
pal_svi <- colorNumeric("inferno", NULL, reverse = TRUE)
svi2021_mmsa_sf_4326 |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_svi(mmsa_RPL_THEMES),
fillOpacity = 0.5,
smoothFactor = 0.5
# increase opacity and resolution
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_svi,
# palette function
values = ~ mmsa_RPL_THEMES,
# variable to pass to palette function
title = 'svi',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()3.3 ADI dataset
ADI_NATRANK: National percentile of block group ADI score Numeric Ranking (1-100) or Suppression Codes:
GQ (Group Quarters) - Greater than 33.3% of Housing Units are Group Quarters
PH (Population/Housing) - Population less than 100 and/or fewer than 30 housing units
GQ-PH (Group Quarters and Population Housing) - Both GQ and PH conditions are met (see above)
QDI (Questionable Data Integrity) - Block Groups missing a key demographic factor for ADI construction
# TODO scale ADI to match SVI
# retrieved from https://www.neighborhoodatlas.medicine.wisc.edu/
adi2021 <- read_csv("data/input_data/adi/adi-download/US_2021_ADI_Census_Block_Group_v4_0_1.csv")
tigris_cbg <- block_groups(cb = TRUE) |>
filter(!STATEFP %in% c("02",
"15", "60",
"66", "69",
"72", "78"))adi2021 |> glimpse()adi2021_sf <- adi2021 |>
inner_join(tigris_cbg, by = c("FIPS" = "GEOID")) |>
st_as_sf()
adi2021_cbg_sf_4326 <- st_transform(adi2021_sf, crs = 4326)
adi2021_cbg_sf_4326 |> saveRDS("data/intermediate_data/adi2021_cbg_sf_4326.rds")3.3.1 ADI at county-level
adi_no_cbg <- adi2021 |>
anti_join(tigris_cbg, by = c("FIPS" = "GEOID"))
cbg_no_adi <- tigris_cbg |>
anti_join(adi2021, by = c("GEOID" = "FIPS"))
# When a Census block group falls into one or more of the suppression criteria mentioned above the ADI rank is replaced with a code describing the suppression reason. Three possible codes will appear in the ADI field: "PH" for suppression due to low population and/or housing, "GQ" for suppression due to a high group quarters population, and "PH-GQ" for suppression due to both types of suppression criteria. A code of "QDI" designates block groups without an ADI due to Questionable Data Integrity, stemming from missing data in the source ACS data.contig_state_vctr <- datasets::state.abb |>
setdiff(c("AK", "HI"))
# https://walker-data.com/umich-workshop-2022/intro-2020-census/#1
# get cbg counts to compute cbg weights for aggregating adi at county-level
cbg_counts <- list()
for (contig_state in contig_state_vctr) {
cbg_counts[[contig_state]] <- get_decennial(
geography = "block group",
variables = "P1_001N",
state = contig_state,
year = 2020,
output = "wide"
) |>
mutate(state_abb = contig_state)
}
cbg_counts |> saveRDS("data/intermediate_data/cbg_counts.RDS")cbg_counts <- readRDS("data/intermediate_data/cbg_counts.RDS") |>
bind_rows()
# computed cbg weights for county-level
cbg_wts <- tigris_cbg |>
select(STATEFP, COUNTYFP, TRACTCE, BLKGRPCE, GEOID) |>
st_drop_geometry() |>
inner_join(cbg_counts, by = "GEOID") |>
group_by(STATEFP, COUNTYFP) |>
# county pop
mutate(county_pop = sum(P1_001N)) |>
ungroup() |>
# cbg pop / county pop
mutate(cbg_wt = P1_001N/county_pop) |>
arrange(GEOID)
cbg_wts |> saveRDS("data/intermediate_data/cbg_wts.RDS")cbg_wts <- readRDS("data/intermediate_data/cbg_wts.RDS")
adi2021_cbg_sf_4326 <- readRDS("data/intermediate_data/adi2021_cbg_sf_4326.rds")
# compute weighted average for adi at county-level
county_adi <- adi2021_cbg_sf_4326 |>
st_drop_geometry() |>
select(-STATEFP, -COUNTYFP, -TRACTCE, -BLKGRPCE, -NAME) |>
inner_join(cbg_wts, by = c("FIPS" = "GEOID")) |>
mutate(ADI_NATRANK = as.numeric(ADI_NATRANK)) |>
filter(!is.na(ADI_NATRANK)) |>
# national adi rank * cbg wt
mutate(adi_wts = ADI_NATRANK * cbg_wt) |>
group_by(STATEFP, COUNTYFP) |>
# weighted average sum of weighted adis
summarize(county_adi_value = sum(adi_wts)) |>
ungroup()
county_adi |> saveRDS("data/intermediate_data/county_adi.RDS")
# cbg_counts_comb <- cbg_counts |>
# bind_rows()
# cbg_counts_comb |> ggplot(aes(x = P1_001N)) + geom_histogram()county_adi <- readRDS("data/intermediate_data/county_adi.RDS") |>
mutate(FIPS = paste0(STATEFP, COUNTYFP)) |>
select(-STATEFP,-COUNTYFP) |>
left_join(tigris_counties, by = c("FIPS" = "GEOID")) |>
st_as_sf()
adi2021_county_sf_4326 <- st_transform(county_adi, crs = 4326)
adi2021_county_sf_4326 |> saveRDS("data/intermediate_data/adi2021_county_sf_4326.rds")adi2021_county_sf_4326 <- readRDS("data/intermediate_data/adi2021_county_sf_4326.rds")
pal_adi <- colorNumeric("inferno", NULL, reverse = TRUE)
adi2021_county_sf_4326 |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_adi(county_adi_value),
fillOpacity = 0.5,
smoothFactor = 0.5
# increase opacity and resolution
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_adi,
# palette function
values = ~ county_adi_value,
# variable to pass to palette function
title = 'adi',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()3.3.2 ADI at CBSA-level
cbg_counts <- readRDS("data/intermediate_data/cbg_counts.RDS") |>
bind_rows()
# computed cbg weights for mmsa-level
cbg_mmsa_wts <- tigris_cbg |>
select(STATEFP, COUNTYFP, TRACTCE, BLKGRPCE, GEOID) |>
st_drop_geometry() |>
inner_join(cbg_counts, by = "GEOID") |>
right_join(mmsa_county_mapping, by = c("STATEFP" = "fips_state_code",
"COUNTYFP" = "fips_county_code")) |>
group_by(d_mmsa) |>
# mmsa pop
mutate(mmsa_pop = sum(P1_001N)) |>
ungroup() |>
# cbg pop / mmsa pop
mutate(cbg_mmsa_wt = P1_001N/mmsa_pop) |>
arrange(GEOID)
cbg_mmsa_wts |> saveRDS("data/intermediate_data/cbg_mmsa_wts.RDS")cbg_mmsa_wts <- readRDS("data/intermediate_data/cbg_mmsa_wts.RDS")
adi2021_cbg_sf_4326 <- readRDS("data/intermediate_data/adi2021_cbg_sf_4326.rds")
# compute weighted average for adi at county-level
mmsa_adi <- adi2021_cbg_sf_4326 |>
st_drop_geometry() |>
select(-STATEFP, -COUNTYFP, -TRACTCE, -BLKGRPCE, -NAME) |>
inner_join(cbg_mmsa_wts, by = c("FIPS" = "GEOID")) |>
mutate(ADI_NATRANK = as.numeric(ADI_NATRANK)) |>
filter(!is.na(ADI_NATRANK)) |>
# national adi rank * cbg_mmsa wt
mutate(adi_wts = ADI_NATRANK * cbg_mmsa_wt) |>
group_by(d_mmsa) |>
# weighted average sum of weighted adis
summarize(mmsa_adi_value = sum(adi_wts, na.rm = TRUE))
mmsa_adi |> saveRDS("data/intermediate_data/mmsa_adi.RDS")
# cbg_mmsa_counts_comb <- cbg_mmsa_counts |>
# bind_rows()
# cbg_mmsa_counts_comb |> ggplot(aes(x = P1_001N)) + geom_histogram()mmsa_adi <- readRDS("data/intermediate_data/mmsa_adi.RDS")
adi2021_mmsa_sf_4326 <- mmsa_county_mapping_geoid_sf_4326 |>
left_join(mmsa_adi, by = "d_mmsa")
adi2021_mmsa_sf_4326 |> saveRDS("data/intermediate_data/adi2021_mmsa_sf_4326.rds")adi2021_mmsa_sf_4326 <- readRDS("data/intermediate_data/adi2021_mmsa_sf_4326.rds")
pal_adi <- colorNumeric("inferno", NULL, reverse = TRUE)
adi2021_mmsa_sf_4326 |>
st_simplify(dTolerance = 1e3) |>
leaflet() |>
addPolygons(
stroke = FALSE,
# remove polygon borders
fillColor = ~ pal_adi(mmsa_adi_value),
fillOpacity = 0.5,
smoothFactor = 0.5
# increase opacity and resolution
) |>
addProviderTiles(providers$CartoDB.Voyager) |> # add third party provider tile
addLegend(
"bottomright",
# location of legend
pal = pal_adi,
# palette function
values = ~ mmsa_adi_value,
# variable to pass to palette function
title = 'adi',
# legend title
opacity = 1 # legend opacity (1 = completely opaque)
) |>
addScaleBar()county_adi <- readRDS("data/intermediate_data/county_adi.RDS")
# compare SVI and ADI across entire US
svi_adi_county <- county_adi |>
mutate(FIPS = paste0(STATEFP, COUNTYFP)) |>
select(FIPS, county_adi_value) |>
st_drop_geometry() |>
inner_join(svi2020_sf_4326 |> st_drop_geometry(),
by = "FIPS") |>
select(FIPS, county_adi_value, starts_with("RPL_"), NAME)
svi_adi_county_corr_plot <- svi_adi_county |>
select(-NAME, -FIPS) |>
select(RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES,
county_adi_value) |>
rename(svi_t1_ses = RPL_THEME1, # Socioeconomic Status - RPL_THEME1
svi_t2_household = RPL_THEME2, # Household Characteristics - RPL_THEME2
svi_t3_raceethnicity = RPL_THEME3, # Racial & Ethnic Minority Status - RPL_THEME3
svi_t4_housingtransport = RPL_THEME4, # Housing Type & Transportation - RPL_THEME4
svi_overall = RPL_THEMES) |>
get_corr_plot()
svi_adi_county_corr_plotggsave(filename = "local/svi_adi_county_corr_plot.png",
width = 6,
height = 4)
svi_adi_county |>
my_point_plot(RPL_THEME3, county_adi_value)3.4 Combining datasets
3.4.1 Area-level analyses
Harmonize at county-level
# incorporate county counts for Poisson regression analyses
county_cts <- readRDS("data/intermediate_data/county_cts.RDS")
area_combined_dataset <- mmsa_county_mapping_geoid_sf_4326 |>
select(d_mmsa, county_county_equivalent, fips_state_code, fips_county_code) |>
left_join(select(ever_asthma_mmsa_tbl, -mmsaname), by = "d_mmsa") |>
left_join(select(ever_smoker_mmsa_tbl, -mmsaname), by = "d_mmsa") |>
mutate(GEOID = paste0(fips_state_code, fips_county_code)) |>
left_join(county_cts, by = "GEOID") |>
st_join(select(svi2020_sf_4326, starts_with("RPL")),
left = TRUE,
join = st_equals) |>
st_join(select(adi2021_county_sf_4326, county_adi_value),
left = TRUE,
join = st_equals) |>
mutate(
n_w_ever_smoker = floor(w_ever_smoker * county_ct),
n_uw_ever_smoker = floor(uw_ever_smoker * county_ct),
n_w_ever_asthma = floor(w_ever_asthma * county_ct),
n_uw_ever_asthma = floor(uw_ever_asthma * county_ct),
county_adi_value = county_adi_value/100 # divide by 100 to match scale of SVI
) |>
rename(svi_t1_ses = RPL_THEME1, # Socioeconomic Status - RPL_THEME1
svi_t2_household = RPL_THEME2, # Household Characteristics - RPL_THEME2
svi_t3_raceethnicity = RPL_THEME3, # Racial & Ethnic Minority Status - RPL_THEME3
svi_t4_housingtransport = RPL_THEME4, # Housing Type & Transportation - RPL_THEME4
svi_overall = RPL_THEMES)
area_combined_dataset |> saveRDS("data/final_data/area_combined_dataset.rds")3.4.2 Individual-level analyses
Harmonize at CBSA-level
mmsa_adi <- readRDS("data/intermediate_data/mmsa_adi.RDS")
mmsa_svi <- readRDS("data/intermediate_data/mmsa_svi.RDS")
indiv_combined_dataset <- smart_2021 |>
mutate(
# NA for missing ever smoker (different than area-level approach)
d_ever_smoker = factor(case_when(d_smoker3 %in% c(1, 2, 3) ~ 1,
d_smoker3 == 4 ~ 0,
TRUE ~ NA)),
d_ever_asthma = factor(case_when(d_ltasth1 == 1 ~ 0,
d_ltasth1 == 2 ~ 1,
TRUE ~ NA)),
# NA for missing ever asthma (different than area-level approach)
d_sex = factor(if_else(d_sex == 1, "male", "female")),
d_race_eth = factor(
case_when(
d_race == 1 ~ "White only, non-Hispanic",
d_race == 2 ~ "Black only, non-Hispanic",
d_race == 3 ~ "American Indian or Alaskan Native only",
d_race == 4 ~ "Asian only, non-Hispanic",
d_race == 5 ~ "Native Hawaiian or other Pacific Islander only",
d_race == 6 ~ "Other race only, non-Hispanic",
d_race == 7 ~ "Multiracial, non-Hispanic",
d_race == 8 ~ "Hispanic",
d_race == 9 ~ "Don’t know/Not sure/Refused",
TRUE ~ NA
),
levels = c(
"White only, non-Hispanic",
"Black only, non-Hispanic",
"American Indian or Alaskan Native only",
"Asian only, non-Hispanic",
"Native Hawaiian or other Pacific Islander only",
"Other race only, non-Hispanic",
"Multiracial, non-Hispanic",
"Hispanic",
"Don’t know/Not sure/Refused"
)
),
d_imput_age_group = factor(
case_when(
d_age_g == 1 ~ "18–24",
d_age_g == 2 ~ "25–34",
d_age_g == 3 ~ "35–44",
d_age_g == 4 ~ "45–54",
d_age_g == 5 ~ "55–64",
d_age_g == 6 ~ "65–99",
TRUE ~ NA
)
)
) |>
select(d_resp_id,
d_ever_smoker,
d_ever_asthma,
d_sex,
d_race_eth,
d_imput_age_group,
d_mmsa,
d_ststr,
d_mmsawt) |>
left_join(mmsa_adi, by = "d_mmsa") |>
left_join(mmsa_svi, by = "d_mmsa") |>
mutate(d_mmsa = factor(d_mmsa),
mmsa_adi_value = mmsa_adi_value/100 # divide by 100 to match scale of SVI
) |>
rename(mmsa_svi_t1_ses = mmsa_RPL_THEME1, # Socioeconomic Status - RPL_THEME1
mmsa_svi_t2_household = mmsa_RPL_THEME2, # Household Characteristics - RPL_THEME2
mmsa_svi_t3_raceethnicity = mmsa_RPL_THEME3, # Racial & Ethnic Minority Status - RPL_THEME3
mmsa_svi_t4_housingtransport = mmsa_RPL_THEME4, # Housing Type & Transportation - RPL_THEME4
mmsa_svi_overall = mmsa_RPL_THEMES)
# TODO should survey design be used here?
indiv_combined_dataset_surv <- indiv_combined_dataset |>
as_survey_design(ids = 1,
strata = d_ststr,
weights = d_mmsawt)
# check contrasts for intercepts
contrasts(indiv_combined_dataset$d_sex)
contrasts(indiv_combined_dataset$d_imput_age_group)
contrasts(indiv_combined_dataset$d_race_eth)
indiv_combined_dataset |> saveRDS("data/final_data/indiv_combined_dataset.rds")4 Results
4.1 Area-level analyses
4.1.1 Relationship between ADI and SVI at area-level
area_combined_dataset <-
readRDS("data/final_data/area_combined_dataset.rds") |>
st_drop_geometry()
# benefited from approach here https://rpubs.com/cqj_00/785193
area_combined_corr_plot <- area_combined_dataset |>
select(
w_ever_asthma,
w_ever_smoker,
svi_t1_ses,
svi_t2_household,
svi_t3_raceethnicity,
svi_t4_housingtransport,
svi_overall,
county_adi_value,
county_ct
) |>
get_corr_plot()
area_combined_corr_plotggsave(filename = "local/area_combined_corr_plot.png",
width = 6, height = 4)
area_combined_corr_plot2 <- area_combined_dataset |>
select(
svi_t1_ses,
svi_t2_household,
svi_t3_raceethnicity,
svi_t4_housingtransport,
svi_overall,
county_adi_value
) |>
get_corr_plot()
ggsave(filename = "local/area_combined_corr_plot2.png",
width = 6, height = 4)
# area_combined_dataset |>
# my_point_plot(svi_t3_raceethnicity,
# county_adi_value)4.1.2
svi_adi_model <-
glm(county_adi_value ~ svi_t1_ses + svi_t2_household + svi_t3_raceethnicity + svi_t4_housingtransport + county_ct,
family = gaussian(),
data = area_combined_dataset)
modelsummary(
list(
"svi_adi_model" = svi_adi_model
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]"
)| svi_adi_model | |
|---|---|
| svi_t1_ses | 0.365 [0.313, 0.416] |
| p = <0.001 | |
| svi_t2_household | 0.258 [0.211, 0.304] |
| p = <0.001 | |
| svi_t3_raceethnicity | −0.419 [−0.462, −0.377] |
| p = <0.001 | |
| svi_t4_housingtransport | 0.034 [−0.006, 0.075] |
| p = 0.098 | |
| county_ct | 0.000 [0.000, 0.000] |
| p = <0.001 | |
| Num.Obs. | 652 |
| R2 | 0.603 |
| AIC | −960.5 |
| BIC | −929.1 |
| Log.Lik. | 487.252 |
| RMSE | 0.11 |
4.1.3 SVI and ADI as predictors of smoking at area-level
area_combined_dataset |>
ggplot(aes(x = w_ever_smoker)) +
geom_density() +
my_theme()plot_grid(
area_combined_dataset |>
my_point_plot(w_ever_smoker,
county_adi_value),
area_combined_dataset |>
my_point_plot(w_ever_smoker,
svi_t1_ses),
area_combined_dataset |>
my_point_plot(w_ever_smoker,
svi_t2_household),
area_combined_dataset |>
my_point_plot(w_ever_smoker,
svi_t3_raceethnicity),
area_combined_dataset |>
my_point_plot(w_ever_smoker,
svi_t4_housingtransport),
area_combined_dataset |>
my_point_plot(w_ever_smoker,
svi_t2_household)
)# remove outliers
smoking_dataset_cleaned <- area_combined_dataset |>
filter(w_ever_smoker >= 0.2)
# exploring modelling approaches..
# benefited from https://rpubs.com/kaz_yos/poisson
smoking_quasibinom_adi_only <-
glm(w_ever_smoker ~ county_adi_value,
family = quasibinomial(),
data = smoking_dataset_cleaned)
smoking_quasipoisson_adi_only <-
glm(
n_w_ever_smoker ~ county_adi_value,
offset = log(county_ct),
family = quasipoisson(link = "log"),
data = smoking_dataset_cleaned
)
smoking_quasibinom <-
glm(w_ever_smoker ~ svi_t1_ses + svi_t2_household + svi_t3_raceethnicity + svi_t4_housingtransport + county_adi_value,
family = quasibinomial(),
data = smoking_dataset_cleaned)
smoking_quasipoisson <-
glm(
n_w_ever_smoker ~ svi_t1_ses + svi_t2_household + svi_t3_raceethnicity + svi_t4_housingtransport + county_adi_value,
offset = log(county_ct),
family = quasipoisson(link = "log"),
data = smoking_dataset_cleaned
)
modelsummary(
list(
"smoking_quasibinom" = smoking_quasibinom,
"smoking_quasibinom_adi_only" = smoking_quasibinom_adi_only,
"smoking_quasipoisson" = smoking_quasipoisson,
"smoking_quasipoisson_adi_only" = smoking_quasipoisson_adi_only
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]",
exponentiate = TRUE
)| smoking_quasibinom | smoking_quasibinom_adi_only | smoking_quasipoisson | smoking_quasipoisson_adi_only | |
|---|---|---|---|---|
| svi_t1_ses | 0.864 [0.793, 0.941] | 0.859 [0.814, 0.906] | ||
| p = <0.001 | p = <0.001 | |||
| svi_t2_household | 0.970 [0.901, 1.046] | 1.025 [0.971, 1.082] | ||
| p = 0.431 | p = 0.369 | |||
| svi_t3_raceethnicity | 0.793 [0.733, 0.857] | 0.791 [0.741, 0.844] | ||
| p = <0.001 | p = <0.001 | |||
| svi_t4_housingtransport | 1.128 [1.063, 1.197] | 1.088 [1.045, 1.134] | ||
| p = <0.001 | p = <0.001 | |||
| county_adi_value | 1.924 [1.725, 2.146] | 1.905 [1.760, 2.063] | 1.692 [1.585, 1.807] | 1.749 [1.662, 1.841] |
| p = <0.001 | p = <0.001 | p = <0.001 | p = <0.001 | |
| Num.Obs. | 650 | 650 | 650 | 650 |
| RMSE | 0.04 | 0.04 | 18409.26 | 27740.71 |
tbl_regression(smoking_quasipoisson, exponentiate = TRUE) |>
bold_p(t = 0.05) |>
bold_labels() |>
italicize_levels()| Characteristic | IRR1 | 95% CI1 | p-value |
|---|---|---|---|
| svi_t1_ses | 0.86 | 0.81, 0.91 | <0.001 |
| svi_t2_household | 1.03 | 0.97, 1.08 | 0.4 |
| svi_t3_raceethnicity | 0.79 | 0.74, 0.84 | <0.001 |
| svi_t4_housingtransport | 1.09 | 1.05, 1.13 | <0.001 |
| county_adi_value | 1.69 | 1.59, 1.81 | <0.001 |
| 1 IRR = Incidence Rate Ratio, CI = Confidence Interval | |||
4.1.4 SVI and ADI as predictors of asthma at area-level
area_combined_dataset |>
ggplot(aes(x = w_ever_asthma)) +
geom_density() +
my_theme()plot_grid(
area_combined_dataset |>
my_point_plot(w_ever_asthma,
county_adi_value),
area_combined_dataset |>
my_point_plot(w_ever_asthma,
svi_t1_ses),
area_combined_dataset |>
my_point_plot(w_ever_asthma,
svi_t2_household),
area_combined_dataset |>
my_point_plot(w_ever_asthma,
svi_t3_raceethnicity),
area_combined_dataset |>
my_point_plot(w_ever_asthma,
svi_t4_housingtransport),
area_combined_dataset |>
my_point_plot(w_ever_asthma,
svi_t2_household)
)# remove outliers
asthma_dataset_cleaned <- area_combined_dataset
# exploring modelling approaches..
# benefitted from https://rpubs.com/kaz_yos/poisson
asthma_quasibinom <-
glm(w_ever_asthma ~ svi_t1_ses + svi_t2_household + svi_t3_raceethnicity + svi_t4_housingtransport + county_adi_value,
family = quasibinomial(),
data = asthma_dataset_cleaned)
asthma_quasipoisson <-
glm(
n_w_ever_asthma ~ svi_t1_ses + svi_t2_household + svi_t3_raceethnicity + svi_t4_housingtransport + county_adi_value,
offset = log(county_ct),
family = quasipoisson(link = "log"),
data = asthma_dataset_cleaned
)
asthma_quasibinom_adi_only <-
glm(w_ever_asthma ~ county_adi_value,
family = quasibinomial(),
data = asthma_dataset_cleaned)
asthma_quasipoisson_adi_only <-
glm(
n_w_ever_asthma ~ county_adi_value,
offset = log(county_ct),
family = quasipoisson(link = "log"),
data = asthma_dataset_cleaned
)
modelsummary(
list(
"asthma_quasibinom" = asthma_quasibinom,
"asthma_quasibinom_adi_only" = asthma_quasibinom_adi_only,
"asthma_quasipoisson" = asthma_quasipoisson,
"asthma_quasipoisson_adi_only" = asthma_quasipoisson_adi_only
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]",
exponentiate = TRUE
)| asthma_quasibinom | asthma_quasibinom_adi_only | asthma_quasipoisson | asthma_quasipoisson_adi_only | |
|---|---|---|---|---|
| svi_t1_ses | 1.033 [0.959, 1.111] | 0.862 [0.812, 0.916] | ||
| p = 0.394 | p = <0.001 | |||
| svi_t2_household | 0.967 [0.907, 1.031] | 1.084 [1.019, 1.152] | ||
| p = 0.300 | p = 0.010 | |||
| svi_t3_raceethnicity | 0.870 [0.813, 0.930] | 0.808 [0.749, 0.871] | ||
| p = <0.001 | p = <0.001 | |||
| svi_t4_housingtransport | 1.030 [0.979, 1.084] | 1.167 [1.114, 1.223] | ||
| p = 0.257 | p = <0.001 | |||
| county_adi_value | 0.987 [0.899, 1.083] | 1.041 [0.977, 1.108] | 0.997 [0.926, 1.073] | 1.053 [0.999, 1.110] |
| p = 0.776 | p = 0.215 | p = 0.933 | p = 0.055 | |
| Num.Obs. | 652 | 652 | 652 | 652 |
| RMSE | 0.02 | 0.02 | 9632.58 | 11360.56 |
tbl_regression(asthma_quasipoisson, exponentiate = TRUE) |>
bold_p(t = 0.05) |>
bold_labels() |>
italicize_levels()| Characteristic | IRR1 | 95% CI1 | p-value |
|---|---|---|---|
| svi_t1_ses | 0.86 | 0.81, 0.92 | <0.001 |
| svi_t2_household | 1.08 | 1.02, 1.15 | 0.010 |
| svi_t3_raceethnicity | 0.81 | 0.75, 0.87 | <0.001 |
| svi_t4_housingtransport | 1.17 | 1.11, 1.22 | <0.001 |
| county_adi_value | 1.00 | 0.93, 1.07 | >0.9 |
| 1 IRR = Incidence Rate Ratio, CI = Confidence Interval | |||
4.2 Individual-level analyses
4.2.1 Relationship between ADI and SVI at individual-level
indiv_combined_dataset <- readRDS("data/final_data/indiv_combined_dataset.rds")
# benefited from approach here https://rpubs.com/cqj_00/785193
indiv_combined_corr_plot <- indiv_combined_dataset |>
select(-d_sex, -d_race_eth, -d_imput_age_group, -d_mmsa, -d_ever_smoker, -d_ever_asthma, -d_resp_id) |>
get_corr_plot()
indiv_combined_corr_plotggsave(filename = "local/indiv_combined_corr_plot.png",
width = 6, height = 4)4.2.2
svi_adi_model <-
glm(mmsa_adi_value ~ mmsa_svi_t1_ses + mmsa_svi_t2_household + mmsa_svi_t3_raceethnicity + mmsa_svi_t4_housingtransport,
family = gaussian(),
data = indiv_combined_dataset)
# TODO check these relationship hold across the US (not just MMSAs)
modelsummary(
list(
"svi_adi_model" = svi_adi_model
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]"
)| svi_adi_model | |
|---|---|
| mmsa_svi_t1_ses | 0.161 [0.157, 0.165] |
| p = <0.001 | |
| mmsa_svi_t2_household | 0.693 [0.690, 0.696] |
| p = <0.001 | |
| mmsa_svi_t3_raceethnicity | −0.782 [−0.785, −0.779] |
| p = <0.001 | |
| mmsa_svi_t4_housingtransport | −0.073 [−0.076, −0.070] |
| p = <0.001 | |
| Num.Obs. | 223183 |
| R2 | 0.709 |
| AIC | −405100.4 |
| BIC | −405038.5 |
| Log.Lik. | 202556.200 |
| RMSE | 0.10 |
4.2.3 SVI and ADI as predictors of smoking at individual-level
smoking_individual_dataset_cleaned <- indiv_combined_dataset |>
filter(!is.na(d_ever_smoker))
plot_grid(smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_adi_value) +
ylab("ADI") +
ylim(0, 1),
smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_svi_t1_ses) +
ylab("SVI 1 (SES)") +
ylim(0, 1),
smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_svi_t2_household) +
ylab("SVI 2 (Household)") +
ylim(0, 1),
smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_svi_t3_raceethnicity) +
ylab("SVI 3 (Race/Eth)") +
ylim(0, 1),
smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_svi_t4_housingtransport) +
ylab("SVI 4 (Housing/Transp)") +
ylim(0, 1),
smoking_individual_dataset_cleaned |>
my_box_plot(d_ever_smoker,
mmsa_svi_overall) +
ylab("SVI Overall") +
ylim(0, 1)
)ggsave(filename = "local/smoking_plot_grid.png")# smoking_individual_dataset_cleaned |> summarize(n_distinct(d_resp_id)) == smoking_individual_dataset_cleaned |> nrow()
# smoking_individual_dataset_cleaned |> summarize(n_distinct(d_resp_id)) == smoking_individual_dataset_cleaned |> nrow()
smoking_individual_dataset_cleaned |>
group_by(d_race_eth, d_sex) |>
summarize(n_ever_smoker = sum(d_ever_smoker == 1),
n_total = n_distinct(d_resp_id)) |>
ungroup() |>
mutate(prop = n_ever_smoker / n_total) |>
ggplot(aes(
x = fct_reorder(d_race_eth, prop),
y = prop,
label = round(prop, digits = 2),
fill = d_sex,
group = d_sex)) +
geom_col(position = position_dodge()) +
my_theme() +
theme(legend.position = "bottom") +
coord_flip() +
xlab("d_race_eth") +
ylab("proportion d_ever_smoker") +
ylim(0, 0.7) +
scale_fill_manual(values = c("female" = "#eb6223", male = "#512892"))ggsave(filename = "local/smoking_sex_raceeth.png",
width = 6, height = 4)
smoking_individual_dataset_cleaned |>
group_by(d_sex, d_imput_age_group) |>
summarize(n_ever_smoker = sum(d_ever_smoker == 1),
n_total = n_distinct(d_resp_id)) |>
ungroup() |>
mutate(prop = n_ever_smoker / n_total) |>
ggplot(aes(
x = d_imput_age_group,
y = prop,
label = round(prop, digits = 2),
colour = d_sex,
group = d_sex
)) +
geom_line(linewidth = 0.8) +
geom_point(size = 2) +
my_theme() +
theme(legend.position = "none") +
xlab("d_imput_age_group") +
ylab("proportion d_ever_smoker") +
ylim(0, 0.7) +
scale_colour_manual(values = c("female" = "#eb6223", male = "#512892"))ggsave(filename = "local/smoking_sex_age.png",
width = 8, height = 3)smoking_binomial_adi_only <-
glm(
d_ever_smoker ~ d_sex * d_race_eth + d_imput_age_group + mmsa_adi_value,
family = binomial(),
data = smoking_individual_dataset_cleaned
)
smoking_binomial <-
glm(
d_ever_smoker ~ d_sex * d_race_eth + d_imput_age_group + mmsa_svi_t1_ses + mmsa_svi_t2_household + mmsa_svi_t3_raceethnicity + mmsa_svi_t4_housingtransport + mmsa_adi_value,
family = binomial(),
data = smoking_individual_dataset_cleaned
)
modelsummary(
list(
"smoking_binomial_adi_only" = smoking_binomial_adi_only,
"smoking_binomial" = smoking_binomial
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]",
exponentiate = TRUE
)| smoking_binomial_adi_only | smoking_binomial | |
|---|---|---|
| d_sexmale | 1.302 [1.275, 1.330] | 1.303 [1.276, 1.330] |
| p = <0.001 | p = <0.001 | |
| d_race_ethBlack only, non-Hispanic | 0.690 [0.661, 0.719] | 0.692 [0.663, 0.723] |
| p = <0.001 | p = <0.001 | |
| d_race_ethAmerican Indian or Alaskan Native only | 1.814 [1.585, 2.076] | 1.806 [1.578, 2.067] |
| p = <0.001 | p = <0.001 | |
| d_race_ethAsian only, non-Hispanic | 0.241 [0.210, 0.274] | 0.237 [0.207, 0.271] |
| p = <0.001 | p = <0.001 | |
| d_race_ethNative Hawaiian or other Pacific Islander only | 0.818 [0.577, 1.142] | 0.809 [0.570, 1.130] |
| p = 0.248 | p = 0.223 | |
| d_race_ethOther race only, non-Hispanic | 0.925 [0.806, 1.060] | 0.911 [0.794, 1.044] |
| p = 0.266 | p = 0.181 | |
| d_race_ethMultiracial, non-Hispanic | 1.286 [1.169, 1.414] | 1.283 [1.166, 1.411] |
| p = <0.001 | p = <0.001 | |
| d_race_ethHispanic | 0.538 [0.511, 0.566] | 0.528 [0.501, 0.557] |
| p = <0.001 | p = <0.001 | |
| d_race_ethDon’t know/Not sure/Refused | 0.800 [0.732, 0.875] | 0.795 [0.727, 0.869] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group25–34 | 2.881 [2.720, 3.054] | 2.886 [2.724, 3.059] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group35–44 | 4.261 [4.029, 4.509] | 4.274 [4.041, 4.522] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group45–54 | 3.866 [3.657, 4.089] | 3.873 [3.664, 4.096] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group55–64 | 4.907 [4.645, 5.186] | 4.906 [4.644, 5.185] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group65–99 | 5.532 [5.245, 5.838] | 5.532 [5.245, 5.838] |
| p = <0.001 | p = <0.001 | |
| mmsa_adi_value | 1.913 [1.817, 2.013] | 1.954 [1.781, 2.145] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_race_ethBlack only, non-Hispanic | 1.267 [1.189, 1.351] | 1.263 [1.185, 1.346] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_race_ethAmerican Indian or Alaskan Native only | 0.915 [0.755, 1.110] | 0.914 [0.754, 1.109] |
| p = 0.369 | p = 0.361 | |
| d_sexmale × d_race_ethAsian only, non-Hispanic | 2.472 [2.119, 2.892] | 2.476 [2.123, 2.898] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_race_ethNative Hawaiian or other Pacific Islander only | 0.952 [0.608, 1.498] | 0.956 [0.611, 1.504] |
| p = 0.831 | p = 0.844 | |
| d_sexmale × d_race_ethOther race only, non-Hispanic | 1.203 [1.004, 1.443] | 1.206 [1.006, 1.446] |
| p = 0.045 | p = 0.043 | |
| d_sexmale × d_race_ethMultiracial, non-Hispanic | 1.091 [0.954, 1.246] | 1.088 [0.952, 1.243] |
| p = 0.203 | p = 0.217 | |
| d_sexmale × d_race_ethHispanic | 1.739 [1.624, 1.864] | 1.742 [1.625, 1.866] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_race_ethDon’t know/Not sure/Refused | 1.248 [1.109, 1.405] | 1.251 [1.112, 1.409] |
| p = <0.001 | p = <0.001 | |
| mmsa_svi_t1_ses | 1.273 [1.169, 1.387] | |
| p = <0.001 | ||
| mmsa_svi_t2_household | 0.835 [0.758, 0.919] | |
| p = <0.001 | ||
| mmsa_svi_t3_raceethnicity | 0.890 [0.808, 0.980] | |
| p = 0.017 | ||
| mmsa_svi_t4_housingtransport | 1.226 [1.147, 1.311] | |
| p = <0.001 | ||
| Num.Obs. | 209306 | 209306 |
| AIC | 267917.1 | 267813.7 |
| BIC | 268163.2 | 268100.7 |
| Log.Lik. | −133934.560 | −133878.827 |
| RMSE | 0.47 | 0.47 |
tbl_regression(smoking_binomial, exponentiate = TRUE) |>
bold_p(t = 0.05) |>
bold_labels() |>
italicize_levels()| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| d_sex | |||
| female | — | — | |
| male | 1.30 | 1.28, 1.33 | <0.001 |
| d_race_eth | |||
| White only, non-Hispanic | — | — | |
| Black only, non-Hispanic | 0.69 | 0.66, 0.72 | <0.001 |
| American Indian or Alaskan Native only | 1.81 | 1.58, 2.07 | <0.001 |
| Asian only, non-Hispanic | 0.24 | 0.21, 0.27 | <0.001 |
| Native Hawaiian or other Pacific Islander only | 0.81 | 0.57, 1.13 | 0.2 |
| Other race only, non-Hispanic | 0.91 | 0.79, 1.04 | 0.2 |
| Multiracial, non-Hispanic | 1.28 | 1.17, 1.41 | <0.001 |
| Hispanic | 0.53 | 0.50, 0.56 | <0.001 |
| Don’t know/Not sure/Refused | 0.80 | 0.73, 0.87 | <0.001 |
| d_imput_age_group | |||
| 18–24 | — | — | |
| 25–34 | 2.89 | 2.72, 3.06 | <0.001 |
| 35–44 | 4.27 | 4.04, 4.52 | <0.001 |
| 45–54 | 3.87 | 3.66, 4.10 | <0.001 |
| 55–64 | 4.91 | 4.64, 5.19 | <0.001 |
| 65–99 | 5.53 | 5.24, 5.84 | <0.001 |
| mmsa_svi_t1_ses | 1.27 | 1.17, 1.39 | <0.001 |
| mmsa_svi_t2_household | 0.83 | 0.76, 0.92 | <0.001 |
| mmsa_svi_t3_raceethnicity | 0.89 | 0.81, 0.98 | 0.017 |
| mmsa_svi_t4_housingtransport | 1.23 | 1.15, 1.31 | <0.001 |
| mmsa_adi_value | 1.95 | 1.78, 2.14 | <0.001 |
| d_sex * d_race_eth | |||
| male * Black only, non-Hispanic | 1.26 | 1.18, 1.35 | <0.001 |
| male * American Indian or Alaskan Native only | 0.91 | 0.75, 1.11 | 0.4 |
| male * Asian only, non-Hispanic | 2.48 | 2.12, 2.90 | <0.001 |
| male * Native Hawaiian or other Pacific Islander only | 0.96 | 0.61, 1.50 | 0.8 |
| male * Other race only, non-Hispanic | 1.21 | 1.01, 1.45 | 0.043 |
| male * Multiracial, non-Hispanic | 1.09 | 0.95, 1.24 | 0.2 |
| male * Hispanic | 1.74 | 1.63, 1.87 | <0.001 |
| male * Don’t know/Not sure/Refused | 1.25 | 1.11, 1.41 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
4.2.4 SVI and ADI as predictors of asthma at individual-level
asthma_individual_dataset_cleaned <- indiv_combined_dataset |>
filter(!is.na(d_ever_asthma),!is.na(d_ever_asthma))
plot_grid(asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_adi_value) +
ylab("ADI") +
ylim(0, 1),
asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_svi_t1_ses) +
ylab("SVI 1 (SES)") +
ylim(0, 1),
asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_svi_t2_household) +
ylab("SVI 2 (Household)") +
ylim(0, 1),
asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_svi_t3_raceethnicity) +
ylab("SVI 3 (Race/Eth)") +
ylim(0, 1),
asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_svi_t4_housingtransport) +
ylab("SVI 4 (Housing/Transp)") +
ylim(0, 1),
asthma_individual_dataset_cleaned |>
my_box_plot(d_ever_asthma,
mmsa_svi_overall) +
ylab("SVI Overall") +
ylim(0, 1)
)ggsave(filename = "local/asthma_plot_grid.png")# asthma_individual_dataset_cleaned |> summarize(n_distinct(d_resp_id)) == asthma_individual_dataset_cleaned |> nrow()
asthma_individual_dataset_cleaned |>
group_by(d_race_eth, d_sex) |>
summarize(n_ever_asthma = sum(d_ever_asthma == 1),
n_total = n_distinct(d_resp_id)) |>
ungroup() |>
mutate(prop = n_ever_asthma / n_total) |>
ggplot(aes(
x = fct_reorder(d_race_eth, prop),
y = prop,
label = round(prop, digits = 2),
fill = d_sex,
group = d_sex)) +
geom_col(position = position_dodge()) +
my_theme() +
theme(legend.position = "bottom") +
coord_flip() +
xlab("d_race_eth") +
ylab("proportion d_ever_asthma") +
ylim(0, 0.3) +
scale_fill_manual(values = c("female" = "#eb6223", male = "#512892"))ggsave(filename = "local/asthma_sex_raceeth.png",
width = 6, height = 4)
asthma_individual_dataset_cleaned |>
group_by(d_sex, d_imput_age_group) |>
summarize(n_ever_asthma = sum(d_ever_asthma == 1),
n_total = n_distinct(d_resp_id)) |>
ungroup() |>
mutate(prop = n_ever_asthma / n_total) |>
ggplot(aes(
x = d_imput_age_group,
y = prop,
label = round(prop, digits = 2),
colour = d_sex,
group = d_sex
)) +
geom_line(linewidth = 0.8) +
geom_point(size = 2) +
my_theme() +
theme(legend.position = "none") +
xlab("d_imput_age_group") +
ylab("proportion d_ever_asthma")+
ylim(0, 0.3) +
scale_colour_manual(values = c("female" = "#eb6223", male = "#512892"))ggsave(filename = "local/asthma_sex_age.png",
width = 8, height = 3)asthma_binomial_adi_only <-
glm(
d_ever_asthma ~ d_ever_smoker + d_sex + d_race_eth + d_imput_age_group + d_sex:d_race_eth + d_sex:d_imput_age_group + mmsa_adi_value,
family = binomial(),
data = asthma_individual_dataset_cleaned
)
asthma_binomial <-
glm(
d_ever_asthma ~ d_ever_smoker + d_sex + d_race_eth + d_imput_age_group + d_sex:d_race_eth + d_sex:d_imput_age_group + mmsa_svi_t1_ses + mmsa_svi_t2_household + mmsa_svi_t3_raceethnicity + mmsa_svi_t4_housingtransport + mmsa_adi_value,
family = binomial(),
data = asthma_individual_dataset_cleaned
)
modelsummary(
list(
"asthma_binomial_adi_only" = asthma_binomial_adi_only,
"asthma_binomial" = asthma_binomial
),
coef_omit = "Intercept",
statistic = c("p = {p.value}"),
estimate = "{estimate} [{conf.low}, {conf.high}]",
exponentiate = TRUE
)| asthma_binomial_adi_only | asthma_binomial | |
|---|---|---|
| d_ever_smoker1 | 1.261 [1.229, 1.294] | 1.259 [1.227, 1.292] |
| p = <0.001 | p = <0.001 | |
| d_sexmale | 1.026 [0.936, 1.124] | 1.028 [0.938, 1.127] |
| p = 0.586 | p = 0.551 | |
| d_race_ethBlack only, non-Hispanic | 1.127 [1.072, 1.185] | 1.151 [1.093, 1.211] |
| p = <0.001 | p = <0.001 | |
| d_race_ethAmerican Indian or Alaskan Native only | 1.374 [1.169, 1.607] | 1.376 [1.171, 1.610] |
| p = <0.001 | p = <0.001 | |
| d_race_ethAsian only, non-Hispanic | 0.538 [0.470, 0.613] | 0.540 [0.471, 0.615] |
| p = <0.001 | p = <0.001 | |
| d_race_ethNative Hawaiian or other Pacific Islander only | 0.975 [0.636, 1.442] | 0.976 [0.636, 1.443] |
| p = 0.904 | p = 0.905 | |
| d_race_ethOther race only, non-Hispanic | 1.160 [0.981, 1.364] | 1.157 [0.979, 1.361] |
| p = 0.077 | p = 0.082 | |
| d_race_ethMultiracial, non-Hispanic | 1.765 [1.590, 1.955] | 1.773 [1.598, 1.965] |
| p = <0.001 | p = <0.001 | |
| d_race_ethHispanic | 0.920 [0.868, 0.975] | 0.921 [0.867, 0.977] |
| p = 0.005 | p = 0.006 | |
| d_race_ethDon’t know/Not sure/Refused | 1.025 [0.915, 1.145] | 1.026 [0.916, 1.147] |
| p = 0.663 | p = 0.652 | |
| d_imput_age_group25–34 | 0.967 [0.894, 1.048] | 0.969 [0.895, 1.049] |
| p = 0.412 | p = 0.438 | |
| d_imput_age_group35–44 | 0.855 [0.791, 0.924] | 0.856 [0.793, 0.925] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group45–54 | 0.878 [0.814, 0.948] | 0.880 [0.816, 0.950] |
| p = <0.001 | p = 0.001 | |
| d_imput_age_group55–64 | 0.847 [0.786, 0.914] | 0.847 [0.786, 0.914] |
| p = <0.001 | p = <0.001 | |
| d_imput_age_group65–99 | 0.632 [0.588, 0.680] | 0.632 [0.588, 0.680] |
| p = <0.001 | p = <0.001 | |
| mmsa_adi_value | 0.949 [0.885, 1.018] | 0.759 [0.669, 0.861] |
| p = 0.142 | p = <0.001 | |
| d_sexmale × d_race_ethBlack only, non-Hispanic | 1.005 [0.923, 1.094] | 1.001 [0.919, 1.089] |
| p = 0.914 | p = 0.990 | |
| d_sexmale × d_race_ethAmerican Indian or Alaskan Native only | 0.849 [0.659, 1.090] | 0.847 [0.658, 1.087] |
| p = 0.201 | p = 0.195 | |
| d_sexmale × d_race_ethAsian only, non-Hispanic | 0.950 [0.787, 1.146] | 0.952 [0.788, 1.149] |
| p = 0.591 | p = 0.605 | |
| d_sexmale × d_race_ethNative Hawaiian or other Pacific Islander only | 1.036 [0.580, 1.847] | 1.038 [0.581, 1.850] |
| p = 0.903 | p = 0.899 | |
| d_sexmale × d_race_ethOther race only, non-Hispanic | 0.758 [0.589, 0.973] | 0.757 [0.588, 0.972] |
| p = 0.030 | p = 0.030 | |
| d_sexmale × d_race_ethMultiracial, non-Hispanic | 0.759 [0.646, 0.891] | 0.759 [0.646, 0.890] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_race_ethHispanic | 0.894 [0.816, 0.980] | 0.894 [0.815, 0.979] |
| p = 0.016 | p = 0.016 | |
| d_sexmale × d_race_ethDon’t know/Not sure/Refused | 0.919 [0.777, 1.086] | 0.918 [0.776, 1.085] |
| p = 0.322 | p = 0.317 | |
| d_sexmale × d_imput_age_group25–34 | 0.749 [0.671, 0.836] | 0.748 [0.670, 0.836] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_imput_age_group35–44 | 0.682 [0.611, 0.761] | 0.682 [0.611, 0.760] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_imput_age_group45–54 | 0.512 [0.458, 0.571] | 0.511 [0.458, 0.570] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_imput_age_group55–64 | 0.520 [0.467, 0.579] | 0.519 [0.466, 0.578] |
| p = <0.001 | p = <0.001 | |
| d_sexmale × d_imput_age_group65–99 | 0.634 [0.573, 0.702] | 0.633 [0.572, 0.701] |
| p = <0.001 | p = <0.001 | |
| mmsa_svi_t1_ses | 1.209 [1.075, 1.358] | |
| p = 0.001 | ||
| mmsa_svi_t2_household | 1.090 [0.955, 1.244] | |
| p = 0.203 | ||
| mmsa_svi_t3_raceethnicity | 0.673 [0.590, 0.768] | |
| p = <0.001 | ||
| mmsa_svi_t4_housingtransport | 1.122 [1.024, 1.230] | |
| p = 0.014 | ||
| Num.Obs. | 208542 | 208542 |
| AIC | 168171.3 | 168132.0 |
| BIC | 168478.7 | 168480.4 |
| Log.Lik. | −84055.651 | −84031.986 |
| RMSE | 0.35 | 0.35 |
tbl_regression(asthma_binomial, exponentiate = TRUE) |>
bold_p(t = 0.05) |>
bold_labels() |>
italicize_levels()| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| d_ever_smoker | |||
| 0 | — | — | |
| 1 | 1.26 | 1.23, 1.29 | <0.001 |
| d_sex | |||
| female | — | — | |
| male | 1.03 | 0.94, 1.13 | 0.6 |
| d_race_eth | |||
| White only, non-Hispanic | — | — | |
| Black only, non-Hispanic | 1.15 | 1.09, 1.21 | <0.001 |
| American Indian or Alaskan Native only | 1.38 | 1.17, 1.61 | <0.001 |
| Asian only, non-Hispanic | 0.54 | 0.47, 0.62 | <0.001 |
| Native Hawaiian or other Pacific Islander only | 0.98 | 0.64, 1.44 | >0.9 |
| Other race only, non-Hispanic | 1.16 | 0.98, 1.36 | 0.082 |
| Multiracial, non-Hispanic | 1.77 | 1.60, 1.96 | <0.001 |
| Hispanic | 0.92 | 0.87, 0.98 | 0.006 |
| Don’t know/Not sure/Refused | 1.03 | 0.92, 1.15 | 0.7 |
| d_imput_age_group | |||
| 18–24 | — | — | |
| 25–34 | 0.97 | 0.90, 1.05 | 0.4 |
| 35–44 | 0.86 | 0.79, 0.93 | <0.001 |
| 45–54 | 0.88 | 0.82, 0.95 | 0.001 |
| 55–64 | 0.85 | 0.79, 0.91 | <0.001 |
| 65–99 | 0.63 | 0.59, 0.68 | <0.001 |
| mmsa_svi_t1_ses | 1.21 | 1.08, 1.36 | 0.001 |
| mmsa_svi_t2_household | 1.09 | 0.95, 1.24 | 0.2 |
| mmsa_svi_t3_raceethnicity | 0.67 | 0.59, 0.77 | <0.001 |
| mmsa_svi_t4_housingtransport | 1.12 | 1.02, 1.23 | 0.014 |
| mmsa_adi_value | 0.76 | 0.67, 0.86 | <0.001 |
| d_sex * d_race_eth | |||
| male * Black only, non-Hispanic | 1.00 | 0.92, 1.09 | >0.9 |
| male * American Indian or Alaskan Native only | 0.85 | 0.66, 1.09 | 0.2 |
| male * Asian only, non-Hispanic | 0.95 | 0.79, 1.15 | 0.6 |
| male * Native Hawaiian or other Pacific Islander only | 1.04 | 0.58, 1.85 | 0.9 |
| male * Other race only, non-Hispanic | 0.76 | 0.59, 0.97 | 0.030 |
| male * Multiracial, non-Hispanic | 0.76 | 0.65, 0.89 | <0.001 |
| male * Hispanic | 0.89 | 0.82, 0.98 | 0.016 |
| male * Don’t know/Not sure/Refused | 0.92 | 0.78, 1.08 | 0.3 |
| d_sex * d_imput_age_group | |||
| male * 25–34 | 0.75 | 0.67, 0.84 | <0.001 |
| male * 35–44 | 0.68 | 0.61, 0.76 | <0.001 |
| male * 45–54 | 0.51 | 0.46, 0.57 | <0.001 |
| male * 55–64 | 0.52 | 0.47, 0.58 | <0.001 |
| male * 65–99 | 0.63 | 0.57, 0.70 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
4.3 Summary of analyses
5 Conclusion
5.1 Implications
“Index purpose, construction, and application matter” (Rollings et al 2023, pg. 11)
“Index differences affect how deprivation is defined and have implications for how individual indices are used and interpreted, especially within the context of health equity (Rollings et al 2023, pg. 11)
Indicators of deprivation can vary by geographic region (Rollings et al 2023, pg. 12)
Correlations between individual adn area varying For example, for the individual-level analyses, aggregating at the CBSA-level collapses across variation that may be predictive Interesting question of whether area-level predict individual-level, discussed a Xie reference
ADI and race/ethnicity (Tipirneni et al 2022 and Rolling 2023 both discuss related legal/political issues)
5.2 Limitations
Preliminary nature of analyses, see XX for future directions
Relating to the literature - full literature review was not performed
Restricted to CDC micropolitan and metropolitan CSBAs. Rollings et al 2023 find that urban areas had poorer agreement between SVI and ADI.
Differing data source years
Cross-sectional/observational only (causal inference) (cf. Lotfata et al 2023)
Asthma prevalence not exarcerbations (and access to care required for doctor to have said have asthma)
Aggregating ADI to county level
- There are several limitations of the project. One clear limitation is that the analyses will be restricted to areas of the US which are included in a CBSA grouping included in the SMART BRFSS dataset. As well as posing a methodological challenge, the differing geographic units introduce limitations for interpretation.
Assumptions of regression analyses
- Linear relationships (cf. Lotfata et al 2023)
Construction of regression analyses
Additional individual level factors
Allowing contributions of predictors to vary by region (cf. Lotfata et al 2023)
Covariate multicollinearity
Missing predictors
Poor fits
Appropriateness of datasets for question
- For national county-level could have used aggregated BRFSS dataset
- Slight mismatches by yea
5.3 Future directions
6 References
Centers for Disease Control and Prevention (CDC). Behavioral Risk Factor Surveillance System Survey Questionnaire. Atlanta, Georgia: U.S. Department of Health and Human Services, Centers for Disease Control and Prevention, 2021.
Centers for Disease Control and Prevention (CDC). Behavioral Risk Factor Surveillance System Survey Data. Atlanta, Georgia: U.S. Department of Health and Human Services, Centers for Disease Control and Prevention, 2021.
Centers for Disease Control and Prevention/ Agency for Toxic Substances and Disease Registry/ Geospatial Research, Analysis, and Services Program. CDC/ATSDR Social Vulnerability Index 2020 Database US. https://www.atsdr.cdc.gov/placeandhealth/svi/data_documentation_download.html. Accessed on 2023/11/04.
https://www.census.gov/programs-surveys/metro-micro/about/glossary.html
https://www.cdc.gov/asthma/most_recent_national_asthma_data.htm.
Kind AJH, Buckingham W. Making Neighborhood Disadvantage Metrics Accessible: The Neighborhood Atlas. New England Journal of Medicine, 2018. 378: 2456-2458. DOI: 10.1056/NEJMp1802313. PMCID: PMC6051533.
Lotfata A, Moosazadeh M, Helbich M, Hoseini B. Socioeconomic and environmental determinants of asthma prevalence: a cross-sectional study at the U.S. County level using geographically weighted random forests. Int J Health Geogr. 2023 Aug 10;22(1):18. doi: 10.1186/s12942-023-00343-6. PMID: 37563691; PMCID: PMC10413687.
Rollings KA, Noppert GA, Griggs JJ, Melendez RA, Clarke PJ. Comparison of two area-level socioeconomic deprivation indices: Implications for public health research, practice, and policy. PLoS One. 2023 Oct 5;18(10):e0292281. doi: 10.1371/journal.pone.0292281. PMID: 37797080; PMCID: PMC10553799.
Tipirneni R, Schmidt H, Lantz PM, Karmakar M. Associations of 4 Geographic Social Vulnerability Indices With US COVID-19 Incidence and Mortality. Am J Public Health. 2022 Nov;112(11):1584-1588. doi: 10.2105/AJPH.2022.307018. Epub 2022 Sep 15. PMID: 36108250; PMCID: PMC9558191.
University of Wisconsin School of Medicine and Public Health. 2021 Area Deprivation Index v4.0.1. Downloaded from https://www.neighborhoodatlas.medicine.wisc.edu/ 2023/11/04
Xie S, Himes BE. Approaches to Link Geospatially Varying Social, Economic, and Environmental Factors with Electronic Health Record Data to Better Understand Asthma Exacerbations. AMIA Annu Symp Proc. 2018 Dec 5;2018:1561-1570. PMID: 30815202; PMCID: PMC6371292.
Xie S, Hubbard RA, Himes BE. Neighborhood-level measures of socioeconomic status are more correlated with individual-level measures in urban areas compared with less urban areas. Ann Epidemiol. 2020 Mar;43:37-43.e4. doi: 10.1016/j.annepidem.2020.01.012. Epub 2020 Feb 11. PMID: 32151518; PMCID: PMC7160852.
6.1 Coding resources
7 Appendices
7.1 Session Info
sessionInfo()R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22621)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.utf8
[2] LC_CTYPE=English_United States.utf8
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.utf8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] srvyr_1.2.0 survey_4.2-1 survival_3.5-5
[4] lme4_1.1-34 Matrix_1.6-1.1 cowplot_1.1.1
[7] gtsummary_1.7.2 modelsummary_1.4.2 broom.mixed_0.2.9.4
[10] kableExtra_1.3.4 RColorBrewer_1.1-3 gridExtra_2.3
[13] leaflet_2.2.0 sf_1.0-14 tidycensus_1.5
[16] tigris_2.0.4 readxl_1.4.3 lubridate_1.9.2
[19] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2
[22] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0
[25] tibble_3.2.1 ggplot2_3.4.3 tidyverse_2.0.0
[28] haven_2.5.3
loaded via a namespace (and not attached):
[1] rstudioapi_0.15.0 jsonlite_1.8.7 wk_0.8.0
[4] datawizard_0.9.0 magrittr_2.0.3 estimability_1.4.1
[7] farver_2.1.1 nloptr_2.0.3 rmarkdown_2.24
[10] ragg_1.2.5 vctrs_0.6.3 minqa_1.2.6
[13] webshot_0.5.5 htmltools_0.5.6 broom_1.0.5
[16] cellranger_1.1.0 s2_1.1.4 sass_0.4.7
[19] parallelly_1.36.0 KernSmooth_2.23-21 htmlwidgets_1.6.2
[22] plyr_1.8.9 emmeans_1.8.8 gt_0.9.0
[25] uuid_1.1-1 commonmark_1.9.0 lifecycle_1.0.3
[28] pkgconfig_2.0.3 R6_2.5.1 fastmap_1.1.1
[31] future_1.33.0 digest_0.6.33 colorspace_2.1-0
[34] furrr_0.3.1 textshaping_0.3.6 crosstalk_1.2.0
[37] labeling_0.4.3 fansi_1.0.4 timechange_0.2.0
[40] httr_1.4.7 mgcv_1.8-42 compiler_4.3.1
[43] proxy_0.4-27 bit64_4.0.5 withr_2.5.1
[46] backports_1.4.1 viridis_0.6.4 DBI_1.1.3
[49] performance_0.10.5 MASS_7.3-60 rappdirs_0.3.3
[52] classInt_0.4-10 tools_4.3.1 units_0.8-4
[55] future.apply_1.11.0 glue_1.6.2 nlme_3.1-162
[58] checkmate_2.2.0 reshape2_1.4.4 generics_0.1.3
[61] gtable_0.3.4 leaflet.providers_2.0.0 labelled_2.12.0
[64] tzdb_0.4.0 class_7.3-22 hms_1.1.3
[67] xml2_1.3.5 utf8_1.2.3 tables_0.9.17
[70] markdown_1.8 pillar_1.9.0 vroom_1.6.3
[73] mitools_2.4 splines_4.3.1 lattice_0.21-8
[76] bit_4.0.5 tidyselect_1.2.0 knitr_1.43
[79] svglite_2.1.1 xfun_0.40 DT_0.30
[82] stringi_1.7.12 rematch_1.0.1 yaml_2.3.7
[85] boot_1.3-28.1 evaluate_0.21 codetools_0.2-19
[88] cli_3.6.1 xtable_1.8-4 parameters_0.21.2
[91] systemfonts_1.0.4 munsell_0.5.0 jquerylib_0.1.4
[94] Rcpp_1.0.11 globals_0.16.2 coda_0.19-4
[97] parallel_4.3.1 ellipsis_0.3.2 bayestestR_0.13.1
[100] listenv_0.9.0 mvtnorm_1.2-3 viridisLite_0.4.2
[103] broom.helpers_1.14.0 scales_1.2.1 e1071_1.7-13
[106] insight_0.19.5 crayon_1.5.2 rlang_1.1.1
[109] rvest_1.0.3
7.2 SMART BRFSS CDC dataset
mmsa_colnames_all |>
kable()| colname | label |
|---|---|
| DISPCODE | FINAL DISPOSITION |
| STATERE1 | RESIDENT OF STATE |
| CELPHON1 | CELLULAR TELEPHONE |
| LADULT1 | ARE YOU 18 YEARS OF AGE OR OLDER? |
| COLGSEX | ARE YOU MALE OR FEMALE? |
| LANDSEX | ARE YOU MALE OR FEMALE? |
| RESPSLCT | RESPONDENT SELECTION |
| SAFETIME | SAFE TIME TO TALK? |
| CADULT1 | ARE YOU 18 YEARS OF AGE OR OLDER? |
| CELLSEX | ARE YOU MALE OR FEMALE? |
| HHADULT | NUMBER OF ADULTS IN HOUSEHOLD |
| SEXVAR | SEX OF RESPONDENT |
| GENHLTH | GENERAL HEALTH |
| PHYSHLTH | NUMBER OF DAYS PHYSICAL HEALTH NOT GOOD |
| MENTHLTH | NUMBER OF DAYS MENTAL HEALTH NOT GOOD |
| POORHLTH | POOR PHYSICAL OR MENTAL HEALTH |
| PRIMINSR | WHAT IS PRIMARY SOURCE OF HEALTH INSURAN |
| PERSDOC3 | HAVE PERSONAL HEALTH CARE PROVIDER? |
| MEDCOST1 | COULD NOT AFFORD TO SEE DOCTOR |
| CHECKUP1 | LENGTH OF TIME SINCE LAST ROUTINE CHECKU |
| EXERANY2 | EXERCISE IN PAST 30 DAYS |
| BPHIGH6 | EVER TOLD BLOOD PRESSURE HIGH |
| BPMEDS | CURRENTLY TAKING BLOOD PRESSURE MEDICATI |
| CHOLCHK3 | HOW LONG SINCE CHOLESTEROL CHECKED |
| TOLDHI3 | EVER TOLD CHOLESTEROL IS HIGH |
| CHOLMED3 | CURRENTLY TAKING MEDICINE FOR HIGH CHOLE |
| CVDINFR4 | EVER DIAGNOSED WITH HEART ATTACK |
| CVDCRHD4 | EVER DIAGNOSED WITH ANGINA OR CORONARY H |
| CVDSTRK3 | EVER DIAGNOSED WITH A STROKE |
| ASTHMA3 | EVER TOLD HAD ASTHMA |
| ASTHNOW | STILL HAVE ASTHMA |
| CHCSCNCR | (EVER TOLD) YOU HAD SKIN CANCER? |
| CHCOCNCR | (EVER TOLD) YOU HAD ANY OTHER TYPES OF C |
| CHCCOPD3 | EVER TOLD YOU HAD C.O.P.D. EMPHYSEMA OR |
| ADDEPEV3 | (EVER TOLD) YOU HAD A DEPRESSIVE DISORDE |
| CHCKDNY2 | EVER TOLD YOU HAVE KIDNEY DISEASE? |
| DIABETE4 | (EVER TOLD) YOU HAD DIABETES |
| DIABAGE3 | AGE WHEN TOLD DIABETES |
| HAVARTH5 | TOLD HAVE ARTHRITIS |
| ARTHEXER | DR. SUGGEST USE OF PHYSICAL ACTIVITY OR |
| ARTHEDU | EVER TAKEN CLASS IN MANAGING ARTHRITIS O |
| LMTJOIN3 | LIMITED BECAUSE OF JOINT SYMPTOMS |
| ARTHDIS2 | DOES ARTHRITIS AFFECT WHETHER YOU WORK |
| JOINPAI2 | HOW BAD WAS JOINT PAIN |
| MARITAL | MARITAL STATUS |
| EDUCA | EDUCATION LEVEL |
| RENTHOM1 | OWN OR RENT HOME |
| NUMHHOL3 | HOUSEHOLD TELEPHONES |
| NUMPHON3 | RESIDENTIAL PHONES |
| CPDEMO1B | DO YOU HAVE A CELL PHONE FOR PERSONAL US |
| VETERAN3 | ARE YOU A VETERAN |
| EMPLOY1 | EMPLOYMENT STATUS |
| CHILDREN | NUMBER OF CHILDREN IN HOUSEHOLD |
| INCOME3 | INCOME LEVEL |
| PREGNANT | PREGNANCY STATUS |
| WEIGHT2 | REPORTED WEIGHT IN POUNDS |
| HEIGHT3 | REPORTED HEIGHT IN FEET AND INCHES |
| DEAF | ARE YOU DEAF OR DO YOU HAVE SERIOUS DIFF |
| BLIND | BLIND OR DIFFICULTY SEEING |
| DECIDE | DIFFICULTY CONCENTRATING OR REMEMBERING |
| DIFFWALK | DIFFICULTY WALKING OR CLIMBING STAIRS |
| DIFFDRES | DIFFICULTY DRESSING OR BATHING |
| DIFFALON | DIFFICULTY DOING ERRANDS ALONE |
| SMOKE100 | SMOKED AT LEAST 100 CIGARETTES |
| SMOKDAY2 | FREQUENCY OF DAYS NOW SMOKING |
| USENOW3 | USE OF SMOKELESS TOBACCO PRODUCTS |
| ECIGNOW1 | DO YOU NOW USE E-CIGARETTES, EVERY DAY, |
| ALCDAY5 | DAYS IN PAST 30 HAD ALCOHOLIC BEVERAGE |
| AVEDRNK3 | AVG ALCOHOLIC DRINKS PER DAY IN PAST 30 |
| DRNK3GE5 | BINGE DRINKING |
| MAXDRNKS | MOST DRINKS ON SINGLE OCCASION PAST 30 D |
| FLUSHOT7 | ADULT FLU SHOT/SPRAY PAST 12 MOS |
| FLSHTMY3 | WHEN RECEIVED MOST RECENT SEASONAL FLU S |
| IMFVPLA2 | WHERE DID YOU GET YOUR LAST FLU SHOT/VAC |
| PNEUVAC4 | PNEUMONIA SHOT EVER |
| HIVTST7 | EVER TESTED H.I.V. |
| HIVTSTD3 | MONTH AND YEAR OF LAST HIV TEST |
| FRUIT2 | HOW MANY TIMES DID YOU EAT FRUIT? |
| FRUITJU2 | HOW MANY TIMES DID YOU DRINK 100 PERCENT |
| FVGREEN1 | HOW MANY TIMES DID YOU EAT DARK GREEN VE |
| FRENCHF1 | HOW OFTEN DO YOU EAT FRENCH FRIES OR FRI |
| POTATOE1 | HOW OFTEN DO YOU EAT FRENCH FRIES OR FRI |
| VEGETAB2 | HOW OFTEN DO YOU EAT FRENCH FRIES OR FRI |
| _STSTR | SAMPLE DESIGN STRATIFICATION VARIABLE |
| _IMPSEX | IMPUTED GENDER |
| CAGEG | FOUR LEVEL CHILD AGE |
| _RFHLTH | ADULTS WITH GOOD OR BETTER HEALTH |
| _PHYS14D | COMPUTED PHYSICAL HEALTH STATUS |
| _MENT14D | COMPUTED MENTAL HEALTH STATUS |
| _HLTHPLN | HAVE ANY HEALTH INSURANCE |
| _HCVU652 | RESPONDENTS AGED 18-64 WITH HEALTH INSUR |
| _TOTINDA | LEISURE TIME PHYSICAL ACTIVITY CALCULATE |
| _RFHYPE6 | HIGH BLOOD PRESSURE CALCULATED VARIABLE |
| _CHOLCH3 | CHOLESTEROL CHECKED CALCULATED VARIABLE |
| _RFCHOL3 | HIGH CHOLESTEROL CALCULATED VARIABLE |
| _MICHD | RESPONDENTS THAT HAVE EVER REPORTED HAVI |
| _LTASTH1 | LIFETIME ASTHMA CALCULATED VARIABLE |
| _CASTHM1 | CURRENT ASTHMA CALCULATED VARIABLE |
| _ASTHMS1 | COMPUTED ASTHMA STATUS |
| _DRDXAR3 | RESPONDENTS DIAGNOSED WITH ARTHRITIS |
| _LMTACT3 | LIMITED USUAL ACTIVITIES |
| _LMTWRK3 | LIMITED WORK ACTIVITIES |
| _PRACE1 | COMPUTED PREFERRED RACE |
| _MRACE1 | CALCULATED NON-HISPANIC RACE INCLUDING M |
| _HISPANC | HISPANIC, LATINO/A, OR SPANISH ORIGIN CA |
| _RACE | COMPUTED RACE-ETHNICITY GROUPING |
| _RACEG21 | COMPUTED NON-HISPANIC WHITES/ALL OTHERS |
| _RACEGR3 | COMPUTED FIVE LEVEL RACE/ETHNICITY CATEG |
| _RACEPRV | COMPUTED RACE GROUPS USED FOR INTERNET P |
| _SEX | CALCULATED SEX VARIABLE |
| _AGEG5YR | REPORTED AGE IN FIVE-YEAR AGE CATEGORIES |
| _AGE65YR | REPORTED AGE IN TWO AGE GROUPS CALCULATE |
| _AGE80 | IMPUTED AGE VALUE COLLAPSED ABOVE 80 |
| _AGE_G | IMPUTED AGE IN SIX GROUPS |
| WTKG3 | COMPUTED WEIGHT IN KILOGRAMS |
| _BMI5 | COMPUTED BODY MASS INDEX |
| _BMI5CAT | COMPUTED BODY MASS INDEX CATEGORIES |
| _RFBMI5 | OVERWEIGHT OR OBESE CALCULATED VARIABLE |
| _EDUCAG | COMPUTED LEVEL OF EDUCATION COMPLETED CA |
| _INCOMG1 | COMPUTED INCOME CATEGORIES |
| _SMOKER3 | COMPUTED SMOKING STATUS |
| _RFSMOK3 | CURRENT SMOKING CALCULATED VARIABLE |
| _CURECI1 | CURRENT E-CIGARETTE USER CALCULATED VARI |
| DRNKANY5 | DRINK ANY ALCOHOLIC BEVERAGES IN PAST 30 |
| _RFBING5 | BINGE DRINKING CALCULATED VARIABLE |
| _DRNKWK1 | COMPUTED NUMBER OF DRINKS OF ALCOHOL BEV |
| _RFDRHV7 | HEAVY ALCOHOL CONSUMPTION CALCULATED VA |
| _FLSHOT7 | FLU SHOT CALCULATED VARIABLE |
| _PNEUMO3 | PNEUMONIA VACCINATION CALCULATED VARIABL |
| _AIDTST4 | EVER BEEN TESTED FOR HIV CALCULATED VARI |
| FTJUDA2_ | COMPUTED FRUIT JUICE INTAKE IN TIMES PER |
| FRUTDA2_ | COMPUTED FRUIT INTAKE IN TIMES PER DAY |
| GRENDA1_ | COMPUTED DARK GREEN VEGETABLE INTAKE IN |
| FRNCHDA_ | FRENCH FRY INTAKE IN TIMES PER DAY |
| POTADA1_ | COMPUTED POTATO SERVINGS PER DAY |
| VEGEDA2_ | COMPUTED OTHER VEGETABLE INTAKE IN TIMES |
| _MISFRT1 | THE NUMBER OF MISSING FRUIT RESPONSES |
| _MISVEG1 | THE NUMBER OF MISSING VEGETABLE RESPONSE |
| _FRTRES1 | MISSING ANY FRUIT RESPONSES |
| _VEGRES1 | MISSING ANY VEGETABLE RESPONSES |
| _FRUTSU1 | TOTAL FRUITS CONSUMED PER DAY |
| _VEGESU1 | TOTAL VEGETABLES CONSUMED PER DAY |
| _FRTLT1A | CONSUME FRUIT 1 OR MORE TIMES PER DAY |
| _VEGLT1A | CONSUME VEGETABLES 1 OR MORE TIMES PER D |
| _FRT16A | REPORTED CONSUMING FRUIT >16/DAY |
| _VEG23A | REPORTED CONSUMING VEGETABLES >23/DAY |
| _FRUITE1 | FRUIT EXCLUSION FROM ANALYSES |
| _VEGETE1 | VEGETABLE EXCLUSION FROM ANALYSES |
| _MMSA | THE CODE OF METROPOLITAN OR MICROPOLITAN STATISTICAL AREA WHERE THE RESPONDENT LIVES |
| _MMSAWT | THE MMSA-LEVEL WEIGHT THAT IS USED WHEN GENERATING MMSA-LEVEL ESTIMATES FOR VARIABLES IN THE DATA SET |
| SEQNO | SEQUENCE NUMBER |
| MMSANAME | THE MMSA NAME |
| _resp_id | RESPONDENT ID |
7.2.1 Selected BRFSS variables
The table below summarizes the variables of interest, combining information from https://www.cdc.gov/brfss/annual_data/2021/summary_matrix_21.html and https://www.cdc.gov/brfss/annual_data/2021/pdf/2021-calculated-variables-version4-508.pdf
| Variable | Description or Result of Calculation |
Values | ||||||||||||||||||
| _MMSA | The code of metropolitan or micropolitan statistical area where the respondent lives | 126 unique five digit character strings. | ||||||||||||||||||
| _SEX | Calculated sex variable. _SEX is derived from BIRTHSEX and SEXVAR. |
|
||||||||||||||||||
| _RACE | Combined variable for race and ethnicity. _RACE is derived from _MRACE1 and _HISPANC. All respondents who reported they are of Hispanic or Latino origin are coded as Hispanic. |
|
||||||||||||||||||
| _AGE_G | Calculated variable for six-level imputed age category (18-64 and 65+ 10 year age groupings). _AGE_G is derived from _IMPAGE (imputed age). |
|
||||||||||||||||||
| _LTASTH1 | CV for lifetime asthma prevalence / Calculated variable for adults who have ever been told they have asthma. _LTASTH1 is derived from ASTHMA3. |
|
||||||||||||||||||
| _SMOKER3 | Smoking Status: Everyday smoker, someday smoker, former smoker and non smoker / Calculated variable for four-level smoker status: everyday smoker, someday smoker, former smoker, non-smoker. _SMOKER3 is derived from SMOKE100 and SMOKDAY2. |
|
Footnotes
With guidance from Dr. Blanca Himes and Dr. Sherrie Xie, I began work on this project prior to the publication of and my awareness of Rollings et al 2023. My title references its title “Comparison of two area-level socioeconomic deprivation indices: Implications for public health research, practice, and policy”.↩︎